mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-16 15:54:10 +00:00
fix(webui): render TeX math delimiters
This commit is contained in:
parent
0a396aa6e2
commit
5bd4a83e85
@ -24,6 +24,7 @@ import {
|
|||||||
} from "@/components/FileReferenceChip";
|
} from "@/components/FileReferenceChip";
|
||||||
import { inferMediaKind } from "@/lib/media";
|
import { inferMediaKind } from "@/lib/media";
|
||||||
import { faviconUrls } from "@/lib/provider-brand";
|
import { faviconUrls } from "@/lib/provider-brand";
|
||||||
|
import { remarkTexMath } from "@/lib/remark-tex-math";
|
||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
|
|
||||||
import "katex/dist/katex.min.css";
|
import "katex/dist/katex.min.css";
|
||||||
@ -190,6 +191,7 @@ const remarkPlugins: NonNullable<ReactMarkdownOptions["remarkPlugins"]> = [
|
|||||||
remarkBreaks,
|
remarkBreaks,
|
||||||
remarkGfm,
|
remarkGfm,
|
||||||
[remarkMath, { singleDollarTextMath: false }],
|
[remarkMath, { singleDollarTextMath: false }],
|
||||||
|
remarkTexMath,
|
||||||
remarkSafeHtmlSubset,
|
remarkSafeHtmlSubset,
|
||||||
];
|
];
|
||||||
const rehypePlugins: NonNullable<ReactMarkdownOptions["rehypePlugins"]> = [rehypeKatex];
|
const rehypePlugins: NonNullable<ReactMarkdownOptions["rehypePlugins"]> = [rehypeKatex];
|
||||||
|
|||||||
297
webui/src/lib/remark-tex-math.ts
Normal file
297
webui/src/lib/remark-tex-math.ts
Normal file
@ -0,0 +1,297 @@
|
|||||||
|
import type { Root } from "mdast";
|
||||||
|
import type { Code, Construct, Effects, Extension, State, Token } from "micromark-util-types";
|
||||||
|
import type { Plugin } from "unified";
|
||||||
|
import type {} from "micromark-extension-math";
|
||||||
|
|
||||||
|
const BACKSLASH = 92;
|
||||||
|
const DOLLAR = 36;
|
||||||
|
const LEFT_PAREN = 40;
|
||||||
|
const RIGHT_PAREN = 41;
|
||||||
|
const LEFT_BRACKET = 91;
|
||||||
|
const RIGHT_BRACKET = 93;
|
||||||
|
const SPACE = 32;
|
||||||
|
const CARET = 94;
|
||||||
|
const UNDERSCORE = 95;
|
||||||
|
const EQUALS = 61;
|
||||||
|
const PLUS = 43;
|
||||||
|
const SLASH = 47;
|
||||||
|
const LESS_THAN = 60;
|
||||||
|
const GREATER_THAN = 62;
|
||||||
|
const LEFT_BRACE = 123;
|
||||||
|
const RIGHT_BRACE = 125;
|
||||||
|
const PIPE = 124;
|
||||||
|
|
||||||
|
type ProcessorData = {
|
||||||
|
micromarkExtensions?: Extension[];
|
||||||
|
};
|
||||||
|
|
||||||
|
const texMathSyntax: Extension = {
|
||||||
|
flow: {
|
||||||
|
[BACKSLASH]: {
|
||||||
|
tokenize: tokenizeTexMathFlow,
|
||||||
|
concrete: true,
|
||||||
|
name: "texMathFlow",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
text: {
|
||||||
|
[BACKSLASH]: {
|
||||||
|
tokenize: tokenizeTexMathText,
|
||||||
|
name: "texMathText",
|
||||||
|
},
|
||||||
|
[DOLLAR]: {
|
||||||
|
tokenize: tokenizeGuardedDollarMathText,
|
||||||
|
name: "guardedDollarMathText",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
export const remarkTexMath: Plugin<[], Root> = function remarkTexMath() {
|
||||||
|
const data = this.data() as ProcessorData;
|
||||||
|
const micromarkExtensions =
|
||||||
|
data.micromarkExtensions || (data.micromarkExtensions = []);
|
||||||
|
|
||||||
|
micromarkExtensions.push(texMathSyntax);
|
||||||
|
};
|
||||||
|
|
||||||
|
function isLineEnding(code: Code): boolean {
|
||||||
|
return code === -5 || code === -4 || code === -3;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isDigit(code: Code): boolean {
|
||||||
|
return code !== null && code >= 48 && code <= 57;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isOpeningDollarBlocked(code: Code): boolean {
|
||||||
|
return code === null || code === DOLLAR || code === SPACE || isLineEnding(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isMathSignal(code: Code): boolean {
|
||||||
|
return code === BACKSLASH
|
||||||
|
|| code === CARET
|
||||||
|
|| code === UNDERSCORE
|
||||||
|
|| code === EQUALS
|
||||||
|
|| code === PLUS
|
||||||
|
|| code === SLASH
|
||||||
|
|| code === LESS_THAN
|
||||||
|
|| code === GREATER_THAN
|
||||||
|
|| code === LEFT_BRACE
|
||||||
|
|| code === RIGHT_BRACE
|
||||||
|
|| code === PIPE;
|
||||||
|
}
|
||||||
|
|
||||||
|
const texMathFlowClose: Construct = {
|
||||||
|
tokenize: tokenizeTexMathFlowClose,
|
||||||
|
partial: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Model output commonly uses `$...$`; numeric-only spans are usually prices, not formulas.
|
||||||
|
function tokenizeGuardedDollarMathText(effects: Effects, ok: State, nok: State): State {
|
||||||
|
let hasMathSignal = false;
|
||||||
|
let hasContent = false;
|
||||||
|
let firstDataCode: Code = null;
|
||||||
|
let previousDataCode: Code = null;
|
||||||
|
|
||||||
|
return start;
|
||||||
|
|
||||||
|
function start(code: Code): State | undefined {
|
||||||
|
effects.enter("mathText");
|
||||||
|
effects.enter("mathTextSequence");
|
||||||
|
effects.consume(code);
|
||||||
|
return open;
|
||||||
|
}
|
||||||
|
|
||||||
|
function open(code: Code): State | undefined {
|
||||||
|
if (isOpeningDollarBlocked(code)) return nok(code);
|
||||||
|
|
||||||
|
effects.exit("mathTextSequence");
|
||||||
|
effects.enter("mathTextData");
|
||||||
|
return data(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
function data(code: Code): State | undefined {
|
||||||
|
if (code === null || isLineEnding(code)) {
|
||||||
|
effects.exit("mathTextData");
|
||||||
|
return nok(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code === DOLLAR) {
|
||||||
|
effects.exit("mathTextData");
|
||||||
|
effects.enter("mathTextSequence");
|
||||||
|
effects.consume(code);
|
||||||
|
effects.exit("mathTextSequence");
|
||||||
|
effects.exit("mathText");
|
||||||
|
return close;
|
||||||
|
}
|
||||||
|
|
||||||
|
consumeData(code);
|
||||||
|
return code === BACKSLASH ? escaped : data;
|
||||||
|
}
|
||||||
|
|
||||||
|
function escaped(code: Code): State | undefined {
|
||||||
|
if (code === null || isLineEnding(code)) {
|
||||||
|
effects.exit("mathTextData");
|
||||||
|
return nok(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
consumeData(code);
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
function close(code: Code): State | undefined {
|
||||||
|
if (!hasContent || previousDataCode === SPACE) return nok(code);
|
||||||
|
if (isDigit(firstDataCode) && !hasMathSignal) return nok(code);
|
||||||
|
return ok(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
function consumeData(code: Code): void {
|
||||||
|
firstDataCode ??= code;
|
||||||
|
hasContent = true;
|
||||||
|
hasMathSignal ||= isMathSignal(code);
|
||||||
|
previousDataCode = code;
|
||||||
|
effects.consume(code);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function tokenizeTexMathText(effects: Effects, ok: State, nok: State): State {
|
||||||
|
let closeSequence: Token | undefined;
|
||||||
|
|
||||||
|
return start;
|
||||||
|
|
||||||
|
function start(code: Code): State | undefined {
|
||||||
|
effects.enter("mathText");
|
||||||
|
effects.enter("mathTextSequence");
|
||||||
|
effects.consume(code);
|
||||||
|
return open;
|
||||||
|
}
|
||||||
|
|
||||||
|
function open(code: Code): State | undefined {
|
||||||
|
if (code !== LEFT_PAREN) return nok(code);
|
||||||
|
|
||||||
|
effects.consume(code);
|
||||||
|
effects.exit("mathTextSequence");
|
||||||
|
effects.enter("mathTextData");
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
function data(code: Code): State | undefined {
|
||||||
|
if (code === null) {
|
||||||
|
effects.exit("mathTextData");
|
||||||
|
return nok(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code === BACKSLASH) {
|
||||||
|
effects.exit("mathTextData");
|
||||||
|
closeSequence = effects.enter("mathTextSequence");
|
||||||
|
effects.consume(code);
|
||||||
|
return close;
|
||||||
|
}
|
||||||
|
|
||||||
|
effects.consume(code);
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
function close(code: Code): State | undefined {
|
||||||
|
if (code === RIGHT_PAREN) {
|
||||||
|
effects.consume(code);
|
||||||
|
effects.exit("mathTextSequence");
|
||||||
|
effects.exit("mathText");
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (closeSequence) closeSequence.type = "mathTextData";
|
||||||
|
return data(code);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function tokenizeTexMathFlow(effects: Effects, ok: State, nok: State): State {
|
||||||
|
return start;
|
||||||
|
|
||||||
|
function start(code: Code): State | undefined {
|
||||||
|
effects.enter("mathFlow");
|
||||||
|
effects.enter("mathFlowFence");
|
||||||
|
effects.enter("mathFlowFenceSequence");
|
||||||
|
effects.consume(code);
|
||||||
|
return open;
|
||||||
|
}
|
||||||
|
|
||||||
|
function open(code: Code): State | undefined {
|
||||||
|
if (code !== LEFT_BRACKET) return nok(code);
|
||||||
|
|
||||||
|
effects.consume(code);
|
||||||
|
effects.exit("mathFlowFenceSequence");
|
||||||
|
effects.exit("mathFlowFence");
|
||||||
|
return contentStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
function contentStart(code: Code): State | undefined {
|
||||||
|
if (code === null) return nok(code);
|
||||||
|
|
||||||
|
if (isLineEnding(code)) {
|
||||||
|
effects.enter("lineEnding");
|
||||||
|
effects.consume(code);
|
||||||
|
effects.exit("lineEnding");
|
||||||
|
return contentStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code === BACKSLASH) {
|
||||||
|
return effects.attempt(texMathFlowClose, done, contentStartAfterBackslash)(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
effects.enter("mathFlowValue");
|
||||||
|
return content(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
function content(code: Code): State | undefined {
|
||||||
|
if (code === null) {
|
||||||
|
effects.exit("mathFlowValue");
|
||||||
|
return nok(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isLineEnding(code)) {
|
||||||
|
effects.exit("mathFlowValue");
|
||||||
|
effects.enter("lineEnding");
|
||||||
|
effects.consume(code);
|
||||||
|
effects.exit("lineEnding");
|
||||||
|
return contentStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code === BACKSLASH) {
|
||||||
|
effects.exit("mathFlowValue");
|
||||||
|
return effects.attempt(texMathFlowClose, done, contentStartAfterBackslash)(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
effects.consume(code);
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
function contentStartAfterBackslash(code: Code): State | undefined {
|
||||||
|
effects.enter("mathFlowValue");
|
||||||
|
effects.consume(code);
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
function done(code: Code): State | undefined {
|
||||||
|
effects.exit("mathFlow");
|
||||||
|
return ok(code);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function tokenizeTexMathFlowClose(effects: Effects, ok: State, nok: State): State {
|
||||||
|
return start;
|
||||||
|
|
||||||
|
function start(code: Code): State | undefined {
|
||||||
|
effects.enter("mathFlowFence");
|
||||||
|
effects.enter("mathFlowFenceSequence");
|
||||||
|
effects.consume(code);
|
||||||
|
return close;
|
||||||
|
}
|
||||||
|
|
||||||
|
function close(code: Code): State | undefined {
|
||||||
|
if (code !== RIGHT_BRACKET) return nok(code);
|
||||||
|
|
||||||
|
effects.consume(code);
|
||||||
|
effects.exit("mathFlowFenceSequence");
|
||||||
|
effects.exit("mathFlowFence");
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -296,6 +296,71 @@ describe("MarkdownTextRenderer", () => {
|
|||||||
expect(container.querySelector(".katex")).toBeNull();
|
expect(container.querySelector(".katex")).toBeNull();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("renders guarded single-dollar inline math", () => {
|
||||||
|
const { container } = render(
|
||||||
|
<MarkdownTextRenderer>
|
||||||
|
{
|
||||||
|
"Variables $x$ and powers $2^n$ render inline, while a price range $10-20$ stays literal."
|
||||||
|
}
|
||||||
|
</MarkdownTextRenderer>,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(container.querySelectorAll(".katex")).toHaveLength(2);
|
||||||
|
expect(container).not.toHaveTextContent("$x$");
|
||||||
|
expect(container).not.toHaveTextContent("$2^n$");
|
||||||
|
expect(container).toHaveTextContent("$10-20$");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("renders model-style single-dollar formula lists", () => {
|
||||||
|
const { container } = render(
|
||||||
|
<MarkdownTextRenderer>
|
||||||
|
{[
|
||||||
|
"- Fourier transform: $\\hat{f}(\\xi) = \\int_{-\\infty}^{+\\infty} f(x)e^{-2\\pi i x \\xi}\\, dx$",
|
||||||
|
"- Taylor expansion: $e^x = \\sum_{n=0}^{\\infty} \\frac{x^n}{n!}$",
|
||||||
|
"- KL divergence: $D_\\text{KL}(P || Q) = \\sum_x P(x) \\log \\frac{P(x)}{Q(x)}$",
|
||||||
|
"- Quantum state: $\\psi = \\alpha|0\\rangle + \\beta|1\\rangle$",
|
||||||
|
].join("\n")}
|
||||||
|
</MarkdownTextRenderer>,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(container.querySelectorAll(".katex")).toHaveLength(4);
|
||||||
|
expect(container).not.toHaveTextContent("$\\hat{f}");
|
||||||
|
expect(container).not.toHaveTextContent("$D_\\text");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("renders TeX inline math delimiters", () => {
|
||||||
|
const { container } = render(
|
||||||
|
<MarkdownTextRenderer>{"Einstein wrote \\(E = mc^2\\) for mass-energy equivalence."}</MarkdownTextRenderer>,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(container.querySelector(".katex")).toBeInTheDocument();
|
||||||
|
expect(container.querySelector(".katex-display")).toBeNull();
|
||||||
|
expect(container).not.toHaveTextContent("\\(");
|
||||||
|
expect(container).not.toHaveTextContent("\\)");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("renders TeX display math delimiters", () => {
|
||||||
|
const { container } = render(
|
||||||
|
<MarkdownTextRenderer>{"\\[x^2 + y^2 = z^2\\]"}</MarkdownTextRenderer>,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(container.querySelector(".katex-display")).toBeInTheDocument();
|
||||||
|
expect(container).not.toHaveTextContent("\\[");
|
||||||
|
expect(container).not.toHaveTextContent("\\]");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("keeps TeX delimiters inside code literal", () => {
|
||||||
|
const { container } = render(
|
||||||
|
<MarkdownTextRenderer highlightCode={false}>
|
||||||
|
{"Inline `\\(x\\)` stays literal.\n\n```text\n\\[x^2\\]\n```"}
|
||||||
|
</MarkdownTextRenderer>,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(container.querySelector(".katex")).toBeNull();
|
||||||
|
expect(screen.getByText("\\(x\\)").tagName).toBe("CODE");
|
||||||
|
expect(screen.getByText("\\[x^2\\]")).toBeInTheDocument();
|
||||||
|
});
|
||||||
|
|
||||||
it("still renders explicit math blocks", () => {
|
it("still renders explicit math blocks", () => {
|
||||||
const { container } = render(
|
const { container } = render(
|
||||||
<MarkdownTextRenderer>{"$$x^2 + y^2 = z^2$$"}</MarkdownTextRenderer>,
|
<MarkdownTextRenderer>{"$$x^2 + y^2 = z^2$$"}</MarkdownTextRenderer>,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user