From 9829cf66d2530d3eb41722cf29404824557fa589 Mon Sep 17 00:00:00 2001
From: Xubin Ren <xubinrencs@gmail.com>
Date: Wed, 13 May 2026 07:20:36 +0000
Subject: [PATCH] fix(webui): keep late reasoning attached above the answer

Some providers only surface structured `reasoning_content` after answer
text has already streamed. The WebUI was treating those late
`reasoning_delta` frames as a fresh assistant placeholder, so the
Thinking bubble rendered below the already-visible answer.

Attach late reasoning back to the active assistant turn instead. The
bubble still renders above the message content, preserving the expected
Thinking -> answer order even when the provider protocol delivers the
reasoning post-hoc. Added a regression test for answer-first followed by
reasoning_delta/reasoning_end.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 webui/src/hooks/useNanobotStream.ts       | 16 +++++++++----
 webui/src/tests/useNanobotStream.test.tsx | 29 +++++++++++++++++++++++
 2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/webui/src/hooks/useNanobotStream.ts b/webui/src/hooks/useNanobotStream.ts
index 60736b393..8e83b9eb2 100644
--- a/webui/src/hooks/useNanobotStream.ts
+++ b/webui/src/hooks/useNanobotStream.ts
@@ -21,17 +21,23 @@ interface StreamBuffer {
 /**
  * Append a reasoning chunk to the last open reasoning stream in ``prev``.
  *
- * Lookup rule: find the most recent assistant turn that is either still
- * streaming reasoning (``reasoningStreaming``) or has no answer text yet.
- * Anything else starts a fresh streaming placeholder so a new turn's
- * reasoning never bleeds into the previous answer.
+ * Lookup rule: prefer the most recent assistant turn in the active UI tail.
+ * Most providers emit reasoning before answer text, but some only expose
+ * ``reasoning_content`` after the answer stream completes. In that post-hoc
+ * case the reasoning still belongs to the same assistant turn and must render
+ * above the answer, not as a new row below it.
  */
 function attachReasoningChunk(prev: UIMessage[], chunk: string): UIMessage[] {
   for (let i = prev.length - 1; i >= 0; i -= 1) {
     const candidate = prev[i];
     if (candidate.role !== "assistant" || candidate.kind === "trace") continue;
     const hasAnswer = candidate.content.length > 0;
-    if (candidate.reasoningStreaming || (!hasAnswer && candidate.reasoning !== undefined)) {
+    if (
+      candidate.reasoningStreaming
+      || candidate.reasoning !== undefined
+      || hasAnswer
+      || candidate.isStreaming
+    ) {
       const merged: UIMessage = {
         ...candidate,
         reasoning: (candidate.reasoning ?? "") + chunk,
diff --git a/webui/src/tests/useNanobotStream.test.tsx b/webui/src/tests/useNanobotStream.test.tsx
index 145d36c1c..f621437fd 100644
--- a/webui/src/tests/useNanobotStream.test.tsx
+++ b/webui/src/tests/useNanobotStream.test.tsx
@@ -209,6 +209,35 @@ describe("useNanobotStream", () => {
     expect(result.current.messages[0].reasoningStreaming).toBe(false);
   });
 
+  it("attaches post-hoc reasoning to the same assistant turn above the answer", () => {
+    const fake = fakeClient();
+    const { result } = renderHook(() => useNanobotStream("chat-r5", EMPTY_MESSAGES), {
+      wrapper: wrap(fake.client),
+    });
+
+    act(() => {
+      fake.emit("chat-r5", {
+        event: "delta",
+        chat_id: "chat-r5",
+        text: "hi~",
+      });
+      fake.emit("chat-r5", { event: "stream_end", chat_id: "chat-r5" });
+      fake.emit("chat-r5", {
+        event: "reasoning_delta",
+        chat_id: "chat-r5",
+        text: "This reasoning arrived after the answer stream.",
+      });
+      fake.emit("chat-r5", { event: "reasoning_end", chat_id: "chat-r5" });
+    });
+
+    expect(result.current.messages).toHaveLength(1);
+    expect(result.current.messages[0].content).toBe("hi~");
+    expect(result.current.messages[0].reasoning).toBe(
+      "This reasoning arrived after the answer stream.",
+    );
+    expect(result.current.messages[0].reasoningStreaming).toBe(false);
+  });
+
   it("attaches assistant media_urls to complete messages", () => {
     const fake = fakeClient();
     const { result } = renderHook(() => useNanobotStream("chat-m", EMPTY_MESSAGES), {