mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-06-15 07:14:08 +00:00
fix(webui): render TeX math delimiters
This commit is contained in:
parent
0a396aa6e2
commit
5bd4a83e85
@ -24,6 +24,7 @@ import {
|
||||
} from "@/components/FileReferenceChip";
|
||||
import { inferMediaKind } from "@/lib/media";
|
||||
import { faviconUrls } from "@/lib/provider-brand";
|
||||
import { remarkTexMath } from "@/lib/remark-tex-math";
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
import "katex/dist/katex.min.css";
|
||||
@ -190,6 +191,7 @@ const remarkPlugins: NonNullable<ReactMarkdownOptions["remarkPlugins"]> = [
|
||||
remarkBreaks,
|
||||
remarkGfm,
|
||||
[remarkMath, { singleDollarTextMath: false }],
|
||||
remarkTexMath,
|
||||
remarkSafeHtmlSubset,
|
||||
];
|
||||
const rehypePlugins: NonNullable<ReactMarkdownOptions["rehypePlugins"]> = [rehypeKatex];
|
||||
|
||||
297
webui/src/lib/remark-tex-math.ts
Normal file
297
webui/src/lib/remark-tex-math.ts
Normal file
@ -0,0 +1,297 @@
|
||||
import type { Root } from "mdast";
|
||||
import type { Code, Construct, Effects, Extension, State, Token } from "micromark-util-types";
|
||||
import type { Plugin } from "unified";
|
||||
import type {} from "micromark-extension-math";
|
||||
|
||||
const BACKSLASH = 92;
|
||||
const DOLLAR = 36;
|
||||
const LEFT_PAREN = 40;
|
||||
const RIGHT_PAREN = 41;
|
||||
const LEFT_BRACKET = 91;
|
||||
const RIGHT_BRACKET = 93;
|
||||
const SPACE = 32;
|
||||
const CARET = 94;
|
||||
const UNDERSCORE = 95;
|
||||
const EQUALS = 61;
|
||||
const PLUS = 43;
|
||||
const SLASH = 47;
|
||||
const LESS_THAN = 60;
|
||||
const GREATER_THAN = 62;
|
||||
const LEFT_BRACE = 123;
|
||||
const RIGHT_BRACE = 125;
|
||||
const PIPE = 124;
|
||||
|
||||
type ProcessorData = {
|
||||
micromarkExtensions?: Extension[];
|
||||
};
|
||||
|
||||
const texMathSyntax: Extension = {
|
||||
flow: {
|
||||
[BACKSLASH]: {
|
||||
tokenize: tokenizeTexMathFlow,
|
||||
concrete: true,
|
||||
name: "texMathFlow",
|
||||
},
|
||||
},
|
||||
text: {
|
||||
[BACKSLASH]: {
|
||||
tokenize: tokenizeTexMathText,
|
||||
name: "texMathText",
|
||||
},
|
||||
[DOLLAR]: {
|
||||
tokenize: tokenizeGuardedDollarMathText,
|
||||
name: "guardedDollarMathText",
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
export const remarkTexMath: Plugin<[], Root> = function remarkTexMath() {
|
||||
const data = this.data() as ProcessorData;
|
||||
const micromarkExtensions =
|
||||
data.micromarkExtensions || (data.micromarkExtensions = []);
|
||||
|
||||
micromarkExtensions.push(texMathSyntax);
|
||||
};
|
||||
|
||||
function isLineEnding(code: Code): boolean {
|
||||
return code === -5 || code === -4 || code === -3;
|
||||
}
|
||||
|
||||
function isDigit(code: Code): boolean {
|
||||
return code !== null && code >= 48 && code <= 57;
|
||||
}
|
||||
|
||||
function isOpeningDollarBlocked(code: Code): boolean {
|
||||
return code === null || code === DOLLAR || code === SPACE || isLineEnding(code);
|
||||
}
|
||||
|
||||
function isMathSignal(code: Code): boolean {
|
||||
return code === BACKSLASH
|
||||
|| code === CARET
|
||||
|| code === UNDERSCORE
|
||||
|| code === EQUALS
|
||||
|| code === PLUS
|
||||
|| code === SLASH
|
||||
|| code === LESS_THAN
|
||||
|| code === GREATER_THAN
|
||||
|| code === LEFT_BRACE
|
||||
|| code === RIGHT_BRACE
|
||||
|| code === PIPE;
|
||||
}
|
||||
|
||||
const texMathFlowClose: Construct = {
|
||||
tokenize: tokenizeTexMathFlowClose,
|
||||
partial: true,
|
||||
};
|
||||
|
||||
// Model output commonly uses `$...$`; numeric-only spans are usually prices, not formulas.
|
||||
function tokenizeGuardedDollarMathText(effects: Effects, ok: State, nok: State): State {
|
||||
let hasMathSignal = false;
|
||||
let hasContent = false;
|
||||
let firstDataCode: Code = null;
|
||||
let previousDataCode: Code = null;
|
||||
|
||||
return start;
|
||||
|
||||
function start(code: Code): State | undefined {
|
||||
effects.enter("mathText");
|
||||
effects.enter("mathTextSequence");
|
||||
effects.consume(code);
|
||||
return open;
|
||||
}
|
||||
|
||||
function open(code: Code): State | undefined {
|
||||
if (isOpeningDollarBlocked(code)) return nok(code);
|
||||
|
||||
effects.exit("mathTextSequence");
|
||||
effects.enter("mathTextData");
|
||||
return data(code);
|
||||
}
|
||||
|
||||
function data(code: Code): State | undefined {
|
||||
if (code === null || isLineEnding(code)) {
|
||||
effects.exit("mathTextData");
|
||||
return nok(code);
|
||||
}
|
||||
|
||||
if (code === DOLLAR) {
|
||||
effects.exit("mathTextData");
|
||||
effects.enter("mathTextSequence");
|
||||
effects.consume(code);
|
||||
effects.exit("mathTextSequence");
|
||||
effects.exit("mathText");
|
||||
return close;
|
||||
}
|
||||
|
||||
consumeData(code);
|
||||
return code === BACKSLASH ? escaped : data;
|
||||
}
|
||||
|
||||
function escaped(code: Code): State | undefined {
|
||||
if (code === null || isLineEnding(code)) {
|
||||
effects.exit("mathTextData");
|
||||
return nok(code);
|
||||
}
|
||||
|
||||
consumeData(code);
|
||||
return data;
|
||||
}
|
||||
|
||||
function close(code: Code): State | undefined {
|
||||
if (!hasContent || previousDataCode === SPACE) return nok(code);
|
||||
if (isDigit(firstDataCode) && !hasMathSignal) return nok(code);
|
||||
return ok(code);
|
||||
}
|
||||
|
||||
function consumeData(code: Code): void {
|
||||
firstDataCode ??= code;
|
||||
hasContent = true;
|
||||
hasMathSignal ||= isMathSignal(code);
|
||||
previousDataCode = code;
|
||||
effects.consume(code);
|
||||
}
|
||||
}
|
||||
|
||||
function tokenizeTexMathText(effects: Effects, ok: State, nok: State): State {
|
||||
let closeSequence: Token | undefined;
|
||||
|
||||
return start;
|
||||
|
||||
function start(code: Code): State | undefined {
|
||||
effects.enter("mathText");
|
||||
effects.enter("mathTextSequence");
|
||||
effects.consume(code);
|
||||
return open;
|
||||
}
|
||||
|
||||
function open(code: Code): State | undefined {
|
||||
if (code !== LEFT_PAREN) return nok(code);
|
||||
|
||||
effects.consume(code);
|
||||
effects.exit("mathTextSequence");
|
||||
effects.enter("mathTextData");
|
||||
return data;
|
||||
}
|
||||
|
||||
function data(code: Code): State | undefined {
|
||||
if (code === null) {
|
||||
effects.exit("mathTextData");
|
||||
return nok(code);
|
||||
}
|
||||
|
||||
if (code === BACKSLASH) {
|
||||
effects.exit("mathTextData");
|
||||
closeSequence = effects.enter("mathTextSequence");
|
||||
effects.consume(code);
|
||||
return close;
|
||||
}
|
||||
|
||||
effects.consume(code);
|
||||
return data;
|
||||
}
|
||||
|
||||
function close(code: Code): State | undefined {
|
||||
if (code === RIGHT_PAREN) {
|
||||
effects.consume(code);
|
||||
effects.exit("mathTextSequence");
|
||||
effects.exit("mathText");
|
||||
return ok;
|
||||
}
|
||||
|
||||
if (closeSequence) closeSequence.type = "mathTextData";
|
||||
return data(code);
|
||||
}
|
||||
}
|
||||
|
||||
function tokenizeTexMathFlow(effects: Effects, ok: State, nok: State): State {
|
||||
return start;
|
||||
|
||||
function start(code: Code): State | undefined {
|
||||
effects.enter("mathFlow");
|
||||
effects.enter("mathFlowFence");
|
||||
effects.enter("mathFlowFenceSequence");
|
||||
effects.consume(code);
|
||||
return open;
|
||||
}
|
||||
|
||||
function open(code: Code): State | undefined {
|
||||
if (code !== LEFT_BRACKET) return nok(code);
|
||||
|
||||
effects.consume(code);
|
||||
effects.exit("mathFlowFenceSequence");
|
||||
effects.exit("mathFlowFence");
|
||||
return contentStart;
|
||||
}
|
||||
|
||||
function contentStart(code: Code): State | undefined {
|
||||
if (code === null) return nok(code);
|
||||
|
||||
if (isLineEnding(code)) {
|
||||
effects.enter("lineEnding");
|
||||
effects.consume(code);
|
||||
effects.exit("lineEnding");
|
||||
return contentStart;
|
||||
}
|
||||
|
||||
if (code === BACKSLASH) {
|
||||
return effects.attempt(texMathFlowClose, done, contentStartAfterBackslash)(code);
|
||||
}
|
||||
|
||||
effects.enter("mathFlowValue");
|
||||
return content(code);
|
||||
}
|
||||
|
||||
function content(code: Code): State | undefined {
|
||||
if (code === null) {
|
||||
effects.exit("mathFlowValue");
|
||||
return nok(code);
|
||||
}
|
||||
|
||||
if (isLineEnding(code)) {
|
||||
effects.exit("mathFlowValue");
|
||||
effects.enter("lineEnding");
|
||||
effects.consume(code);
|
||||
effects.exit("lineEnding");
|
||||
return contentStart;
|
||||
}
|
||||
|
||||
if (code === BACKSLASH) {
|
||||
effects.exit("mathFlowValue");
|
||||
return effects.attempt(texMathFlowClose, done, contentStartAfterBackslash)(code);
|
||||
}
|
||||
|
||||
effects.consume(code);
|
||||
return content;
|
||||
}
|
||||
|
||||
function contentStartAfterBackslash(code: Code): State | undefined {
|
||||
effects.enter("mathFlowValue");
|
||||
effects.consume(code);
|
||||
return content;
|
||||
}
|
||||
|
||||
function done(code: Code): State | undefined {
|
||||
effects.exit("mathFlow");
|
||||
return ok(code);
|
||||
}
|
||||
}
|
||||
|
||||
function tokenizeTexMathFlowClose(effects: Effects, ok: State, nok: State): State {
|
||||
return start;
|
||||
|
||||
function start(code: Code): State | undefined {
|
||||
effects.enter("mathFlowFence");
|
||||
effects.enter("mathFlowFenceSequence");
|
||||
effects.consume(code);
|
||||
return close;
|
||||
}
|
||||
|
||||
function close(code: Code): State | undefined {
|
||||
if (code !== RIGHT_BRACKET) return nok(code);
|
||||
|
||||
effects.consume(code);
|
||||
effects.exit("mathFlowFenceSequence");
|
||||
effects.exit("mathFlowFence");
|
||||
return ok;
|
||||
}
|
||||
}
|
||||
@ -296,6 +296,71 @@ describe("MarkdownTextRenderer", () => {
|
||||
expect(container.querySelector(".katex")).toBeNull();
|
||||
});
|
||||
|
||||
it("renders guarded single-dollar inline math", () => {
|
||||
const { container } = render(
|
||||
<MarkdownTextRenderer>
|
||||
{
|
||||
"Variables $x$ and powers $2^n$ render inline, while a price range $10-20$ stays literal."
|
||||
}
|
||||
</MarkdownTextRenderer>,
|
||||
);
|
||||
|
||||
expect(container.querySelectorAll(".katex")).toHaveLength(2);
|
||||
expect(container).not.toHaveTextContent("$x$");
|
||||
expect(container).not.toHaveTextContent("$2^n$");
|
||||
expect(container).toHaveTextContent("$10-20$");
|
||||
});
|
||||
|
||||
it("renders model-style single-dollar formula lists", () => {
|
||||
const { container } = render(
|
||||
<MarkdownTextRenderer>
|
||||
{[
|
||||
"- Fourier transform: $\\hat{f}(\\xi) = \\int_{-\\infty}^{+\\infty} f(x)e^{-2\\pi i x \\xi}\\, dx$",
|
||||
"- Taylor expansion: $e^x = \\sum_{n=0}^{\\infty} \\frac{x^n}{n!}$",
|
||||
"- KL divergence: $D_\\text{KL}(P || Q) = \\sum_x P(x) \\log \\frac{P(x)}{Q(x)}$",
|
||||
"- Quantum state: $\\psi = \\alpha|0\\rangle + \\beta|1\\rangle$",
|
||||
].join("\n")}
|
||||
</MarkdownTextRenderer>,
|
||||
);
|
||||
|
||||
expect(container.querySelectorAll(".katex")).toHaveLength(4);
|
||||
expect(container).not.toHaveTextContent("$\\hat{f}");
|
||||
expect(container).not.toHaveTextContent("$D_\\text");
|
||||
});
|
||||
|
||||
it("renders TeX inline math delimiters", () => {
|
||||
const { container } = render(
|
||||
<MarkdownTextRenderer>{"Einstein wrote \\(E = mc^2\\) for mass-energy equivalence."}</MarkdownTextRenderer>,
|
||||
);
|
||||
|
||||
expect(container.querySelector(".katex")).toBeInTheDocument();
|
||||
expect(container.querySelector(".katex-display")).toBeNull();
|
||||
expect(container).not.toHaveTextContent("\\(");
|
||||
expect(container).not.toHaveTextContent("\\)");
|
||||
});
|
||||
|
||||
it("renders TeX display math delimiters", () => {
|
||||
const { container } = render(
|
||||
<MarkdownTextRenderer>{"\\[x^2 + y^2 = z^2\\]"}</MarkdownTextRenderer>,
|
||||
);
|
||||
|
||||
expect(container.querySelector(".katex-display")).toBeInTheDocument();
|
||||
expect(container).not.toHaveTextContent("\\[");
|
||||
expect(container).not.toHaveTextContent("\\]");
|
||||
});
|
||||
|
||||
it("keeps TeX delimiters inside code literal", () => {
|
||||
const { container } = render(
|
||||
<MarkdownTextRenderer highlightCode={false}>
|
||||
{"Inline `\\(x\\)` stays literal.\n\n```text\n\\[x^2\\]\n```"}
|
||||
</MarkdownTextRenderer>,
|
||||
);
|
||||
|
||||
expect(container.querySelector(".katex")).toBeNull();
|
||||
expect(screen.getByText("\\(x\\)").tagName).toBe("CODE");
|
||||
expect(screen.getByText("\\[x^2\\]")).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("still renders explicit math blocks", () => {
|
||||
const { container } = render(
|
||||
<MarkdownTextRenderer>{"$$x^2 + y^2 = z^2$$"}</MarkdownTextRenderer>,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user