From 5bd4a83e85081c02671300cb82a15efe0892fba8 Mon Sep 17 00:00:00 2001 From: chengyongru Date: Mon, 8 Jun 2026 17:31:10 +0800 Subject: [PATCH] fix(webui): render TeX math delimiters --- webui/src/components/MarkdownTextRenderer.tsx | 2 + webui/src/lib/remark-tex-math.ts | 297 ++++++++++++++++++ .../src/tests/markdown-text-renderer.test.tsx | 65 ++++ 3 files changed, 364 insertions(+) create mode 100644 webui/src/lib/remark-tex-math.ts diff --git a/webui/src/components/MarkdownTextRenderer.tsx b/webui/src/components/MarkdownTextRenderer.tsx index 307a45f16..3d9c525fa 100644 --- a/webui/src/components/MarkdownTextRenderer.tsx +++ b/webui/src/components/MarkdownTextRenderer.tsx @@ -24,6 +24,7 @@ import { } from "@/components/FileReferenceChip"; import { inferMediaKind } from "@/lib/media"; import { faviconUrls } from "@/lib/provider-brand"; +import { remarkTexMath } from "@/lib/remark-tex-math"; import { cn } from "@/lib/utils"; import "katex/dist/katex.min.css"; @@ -190,6 +191,7 @@ const remarkPlugins: NonNullable = [ remarkBreaks, remarkGfm, [remarkMath, { singleDollarTextMath: false }], + remarkTexMath, remarkSafeHtmlSubset, ]; const rehypePlugins: NonNullable = [rehypeKatex]; diff --git a/webui/src/lib/remark-tex-math.ts b/webui/src/lib/remark-tex-math.ts new file mode 100644 index 000000000..c9a304357 --- /dev/null +++ b/webui/src/lib/remark-tex-math.ts @@ -0,0 +1,297 @@ +import type { Root } from "mdast"; +import type { Code, Construct, Effects, Extension, State, Token } from "micromark-util-types"; +import type { Plugin } from "unified"; +import type {} from "micromark-extension-math"; + +const BACKSLASH = 92; +const DOLLAR = 36; +const LEFT_PAREN = 40; +const RIGHT_PAREN = 41; +const LEFT_BRACKET = 91; +const RIGHT_BRACKET = 93; +const SPACE = 32; +const CARET = 94; +const UNDERSCORE = 95; +const EQUALS = 61; +const PLUS = 43; +const SLASH = 47; +const LESS_THAN = 60; +const GREATER_THAN = 62; +const LEFT_BRACE = 123; +const RIGHT_BRACE = 125; +const PIPE = 124; + +type ProcessorData = { + micromarkExtensions?: Extension[]; +}; + +const texMathSyntax: Extension = { + flow: { + [BACKSLASH]: { + tokenize: tokenizeTexMathFlow, + concrete: true, + name: "texMathFlow", + }, + }, + text: { + [BACKSLASH]: { + tokenize: tokenizeTexMathText, + name: "texMathText", + }, + [DOLLAR]: { + tokenize: tokenizeGuardedDollarMathText, + name: "guardedDollarMathText", + }, + }, +}; + +export const remarkTexMath: Plugin<[], Root> = function remarkTexMath() { + const data = this.data() as ProcessorData; + const micromarkExtensions = + data.micromarkExtensions || (data.micromarkExtensions = []); + + micromarkExtensions.push(texMathSyntax); +}; + +function isLineEnding(code: Code): boolean { + return code === -5 || code === -4 || code === -3; +} + +function isDigit(code: Code): boolean { + return code !== null && code >= 48 && code <= 57; +} + +function isOpeningDollarBlocked(code: Code): boolean { + return code === null || code === DOLLAR || code === SPACE || isLineEnding(code); +} + +function isMathSignal(code: Code): boolean { + return code === BACKSLASH + || code === CARET + || code === UNDERSCORE + || code === EQUALS + || code === PLUS + || code === SLASH + || code === LESS_THAN + || code === GREATER_THAN + || code === LEFT_BRACE + || code === RIGHT_BRACE + || code === PIPE; +} + +const texMathFlowClose: Construct = { + tokenize: tokenizeTexMathFlowClose, + partial: true, +}; + +// Model output commonly uses `$...$`; numeric-only spans are usually prices, not formulas. +function tokenizeGuardedDollarMathText(effects: Effects, ok: State, nok: State): State { + let hasMathSignal = false; + let hasContent = false; + let firstDataCode: Code = null; + let previousDataCode: Code = null; + + return start; + + function start(code: Code): State | undefined { + effects.enter("mathText"); + effects.enter("mathTextSequence"); + effects.consume(code); + return open; + } + + function open(code: Code): State | undefined { + if (isOpeningDollarBlocked(code)) return nok(code); + + effects.exit("mathTextSequence"); + effects.enter("mathTextData"); + return data(code); + } + + function data(code: Code): State | undefined { + if (code === null || isLineEnding(code)) { + effects.exit("mathTextData"); + return nok(code); + } + + if (code === DOLLAR) { + effects.exit("mathTextData"); + effects.enter("mathTextSequence"); + effects.consume(code); + effects.exit("mathTextSequence"); + effects.exit("mathText"); + return close; + } + + consumeData(code); + return code === BACKSLASH ? escaped : data; + } + + function escaped(code: Code): State | undefined { + if (code === null || isLineEnding(code)) { + effects.exit("mathTextData"); + return nok(code); + } + + consumeData(code); + return data; + } + + function close(code: Code): State | undefined { + if (!hasContent || previousDataCode === SPACE) return nok(code); + if (isDigit(firstDataCode) && !hasMathSignal) return nok(code); + return ok(code); + } + + function consumeData(code: Code): void { + firstDataCode ??= code; + hasContent = true; + hasMathSignal ||= isMathSignal(code); + previousDataCode = code; + effects.consume(code); + } +} + +function tokenizeTexMathText(effects: Effects, ok: State, nok: State): State { + let closeSequence: Token | undefined; + + return start; + + function start(code: Code): State | undefined { + effects.enter("mathText"); + effects.enter("mathTextSequence"); + effects.consume(code); + return open; + } + + function open(code: Code): State | undefined { + if (code !== LEFT_PAREN) return nok(code); + + effects.consume(code); + effects.exit("mathTextSequence"); + effects.enter("mathTextData"); + return data; + } + + function data(code: Code): State | undefined { + if (code === null) { + effects.exit("mathTextData"); + return nok(code); + } + + if (code === BACKSLASH) { + effects.exit("mathTextData"); + closeSequence = effects.enter("mathTextSequence"); + effects.consume(code); + return close; + } + + effects.consume(code); + return data; + } + + function close(code: Code): State | undefined { + if (code === RIGHT_PAREN) { + effects.consume(code); + effects.exit("mathTextSequence"); + effects.exit("mathText"); + return ok; + } + + if (closeSequence) closeSequence.type = "mathTextData"; + return data(code); + } +} + +function tokenizeTexMathFlow(effects: Effects, ok: State, nok: State): State { + return start; + + function start(code: Code): State | undefined { + effects.enter("mathFlow"); + effects.enter("mathFlowFence"); + effects.enter("mathFlowFenceSequence"); + effects.consume(code); + return open; + } + + function open(code: Code): State | undefined { + if (code !== LEFT_BRACKET) return nok(code); + + effects.consume(code); + effects.exit("mathFlowFenceSequence"); + effects.exit("mathFlowFence"); + return contentStart; + } + + function contentStart(code: Code): State | undefined { + if (code === null) return nok(code); + + if (isLineEnding(code)) { + effects.enter("lineEnding"); + effects.consume(code); + effects.exit("lineEnding"); + return contentStart; + } + + if (code === BACKSLASH) { + return effects.attempt(texMathFlowClose, done, contentStartAfterBackslash)(code); + } + + effects.enter("mathFlowValue"); + return content(code); + } + + function content(code: Code): State | undefined { + if (code === null) { + effects.exit("mathFlowValue"); + return nok(code); + } + + if (isLineEnding(code)) { + effects.exit("mathFlowValue"); + effects.enter("lineEnding"); + effects.consume(code); + effects.exit("lineEnding"); + return contentStart; + } + + if (code === BACKSLASH) { + effects.exit("mathFlowValue"); + return effects.attempt(texMathFlowClose, done, contentStartAfterBackslash)(code); + } + + effects.consume(code); + return content; + } + + function contentStartAfterBackslash(code: Code): State | undefined { + effects.enter("mathFlowValue"); + effects.consume(code); + return content; + } + + function done(code: Code): State | undefined { + effects.exit("mathFlow"); + return ok(code); + } +} + +function tokenizeTexMathFlowClose(effects: Effects, ok: State, nok: State): State { + return start; + + function start(code: Code): State | undefined { + effects.enter("mathFlowFence"); + effects.enter("mathFlowFenceSequence"); + effects.consume(code); + return close; + } + + function close(code: Code): State | undefined { + if (code !== RIGHT_BRACKET) return nok(code); + + effects.consume(code); + effects.exit("mathFlowFenceSequence"); + effects.exit("mathFlowFence"); + return ok; + } +} diff --git a/webui/src/tests/markdown-text-renderer.test.tsx b/webui/src/tests/markdown-text-renderer.test.tsx index 4d5972ace..9543a6bda 100644 --- a/webui/src/tests/markdown-text-renderer.test.tsx +++ b/webui/src/tests/markdown-text-renderer.test.tsx @@ -296,6 +296,71 @@ describe("MarkdownTextRenderer", () => { expect(container.querySelector(".katex")).toBeNull(); }); + it("renders guarded single-dollar inline math", () => { + const { container } = render( + + { + "Variables $x$ and powers $2^n$ render inline, while a price range $10-20$ stays literal." + } + , + ); + + expect(container.querySelectorAll(".katex")).toHaveLength(2); + expect(container).not.toHaveTextContent("$x$"); + expect(container).not.toHaveTextContent("$2^n$"); + expect(container).toHaveTextContent("$10-20$"); + }); + + it("renders model-style single-dollar formula lists", () => { + const { container } = render( + + {[ + "- Fourier transform: $\\hat{f}(\\xi) = \\int_{-\\infty}^{+\\infty} f(x)e^{-2\\pi i x \\xi}\\, dx$", + "- Taylor expansion: $e^x = \\sum_{n=0}^{\\infty} \\frac{x^n}{n!}$", + "- KL divergence: $D_\\text{KL}(P || Q) = \\sum_x P(x) \\log \\frac{P(x)}{Q(x)}$", + "- Quantum state: $\\psi = \\alpha|0\\rangle + \\beta|1\\rangle$", + ].join("\n")} + , + ); + + expect(container.querySelectorAll(".katex")).toHaveLength(4); + expect(container).not.toHaveTextContent("$\\hat{f}"); + expect(container).not.toHaveTextContent("$D_\\text"); + }); + + it("renders TeX inline math delimiters", () => { + const { container } = render( + {"Einstein wrote \\(E = mc^2\\) for mass-energy equivalence."}, + ); + + expect(container.querySelector(".katex")).toBeInTheDocument(); + expect(container.querySelector(".katex-display")).toBeNull(); + expect(container).not.toHaveTextContent("\\("); + expect(container).not.toHaveTextContent("\\)"); + }); + + it("renders TeX display math delimiters", () => { + const { container } = render( + {"\\[x^2 + y^2 = z^2\\]"}, + ); + + expect(container.querySelector(".katex-display")).toBeInTheDocument(); + expect(container).not.toHaveTextContent("\\["); + expect(container).not.toHaveTextContent("\\]"); + }); + + it("keeps TeX delimiters inside code literal", () => { + const { container } = render( + + {"Inline `\\(x\\)` stays literal.\n\n```text\n\\[x^2\\]\n```"} + , + ); + + expect(container.querySelector(".katex")).toBeNull(); + expect(screen.getByText("\\(x\\)").tagName).toBe("CODE"); + expect(screen.getByText("\\[x^2\\]")).toBeInTheDocument(); + }); + it("still renders explicit math blocks", () => { const { container } = render( {"$$x^2 + y^2 = z^2$$"},