from nanobot.utils.helpers import extract_reasoning, extract_think, strip_think class TestStripThinkTag: """Test ... block stripping (Gemma 4 and similar models).""" def test_closed_tag(self): assert strip_think("Hello reasoning World") == "Hello World" def test_unclosed_trailing_tag(self): assert strip_think("ongoing...") == "" def test_multiline_tag(self): assert strip_think("\nline1\nline2\nEnd") == "End" def test_tag_with_nested_angle_brackets(self): text = "a < 3 and b > 2result" assert strip_think(text) == "result" def test_multiple_tag_blocks(self): text = "AxByC" assert strip_think(text) == "ABC" def test_tag_only_whitespace_inside(self): assert strip_think("before after") == "beforeafter" def test_self_closing_tag_not_matched(self): assert strip_think("some text") == "some text" def test_normal_text_unchanged(self): assert strip_think("Just normal text") == "Just normal text" def test_empty_string(self): assert strip_think("") == "" class TestStripThinkFalsePositive: """Ensure mid-content / tags are NOT stripped (#3004).""" def test_backtick_think_tag_preserved(self): text = "*Think Stripping:* A new utility to strip `` tags from output." assert strip_think(text) == text def test_prose_think_tag_preserved(self): text = "The model emits at the start of its response." assert strip_think(text) == text def test_code_block_think_tag_preserved(self): text = 'Example:\n```\ntext = re.sub(r"[\\s\\S]*", "", text)\n```\nDone.' assert strip_think(text) == text def test_backtick_thought_tag_preserved(self): text = "Gemma 4 uses `` blocks for reasoning." assert strip_think(text) == text def test_prefix_unclosed_think_still_stripped(self): assert strip_think("reasoning without closing") == "" def test_prefix_unclosed_think_with_whitespace(self): assert strip_think(" reasoning...") == "" def test_prefix_unclosed_thought_still_stripped(self): assert strip_think("reasoning without closing") == "" class TestStripThinkMalformedLeaks: """Regression: Gemma 4's Ollama renderer occasionally emits a tag name with no closing '>', running straight into the user-facing content (e.g. `' and let these through.""" def test_malformed_think_no_gt_chinese(self): assert strip_think("` is a valid tag name variant; must not match. assert strip_think("content") == "content" def test_self_closing_preserved(self): assert strip_think("ok") == "ok" assert strip_think("ok") == "ok" def test_orphan_closing_think_at_end_stripped(self): # Typical leak: model opens `` without closing; we strip the # opener from the start, leaving an orphan `` at the end. assert strip_think("answer") == "answer" def test_orphan_closing_think_at_start_stripped(self): assert strip_think("answer") == "answer" def test_channel_marker_at_start_stripped(self): # Harmony / Gemma 4 channel markers leak at the start of a response. assert strip_think("喷泉策略:09:00 开启") == ("喷泉策略:09:00 开启") assert strip_think("<|channel|>answer") == "answer" def test_partial_trailing_think_tag_after_visible_text(self): assert strip_think("喷泉策略说明 ") == "answer" def test_partial_trailing_channel_marker_after_visible_text(self): assert strip_think("喷泉策略说明 <|chan") == "喷泉策略说明" assert strip_think("answer ") == "answer" class TestStripThinkConservativePreserve: """Regression: the malformed-tag / orphan cleanup must NOT touch legitimate prose or code that mentions these tokens literally, otherwise `strip_think` (which runs before history is persisted, memory.py) will silently rewrite the conversation transcript.""" def test_think_dash_variant_preserved(self): assert strip_think("bar") == "bar" def test_think_underscore_variant_preserved(self): assert strip_think("bar") == "bar" def test_think_numeric_variant_preserved(self): assert strip_think("bar") == "bar" def test_think_namespaced_variant_preserved(self): assert strip_think("bar") == "bar" def test_literal_close_think_in_prose_preserved(self): # Mid-prose references to `` in backticks or plain text must # not be stripped; edge-only regex protects this. text = "Use `` to close a thinking block." assert strip_think(text) == text def test_literal_channel_marker_in_prose_preserved(self): text = "The Harmony spec uses `<|channel|>` and `` markers." assert strip_think(text) == text def test_literal_channel_marker_in_code_block_preserved(self): text = "Example:\n```\nif line.startswith(''):\n skip()\n```" assert strip_think(text) == text class TestExtractThink: def test_no_think_tags(self): thinking, clean = extract_think("Hello World") assert thinking is None assert clean == "Hello World" def test_single_think_block(self): text = "Hello reasoning content\nhere World" thinking, clean = extract_think(text) assert thinking == "reasoning content\nhere" assert clean == "Hello World" def test_single_thought_block(self): text = "Hello reasoning content World" thinking, clean = extract_think(text) assert thinking == "reasoning content" assert clean == "Hello World" def test_multiple_think_blocks(self): text = "AfirstBsecondC" thinking, clean = extract_think(text) assert thinking == "first\n\nsecond" assert clean == "ABC" def test_think_only_no_content(self): text = "just thinking" thinking, clean = extract_think(text) assert thinking == "just thinking" assert clean == "" def test_unclosed_think_not_extracted(self): # Unclosed blocks at start are stripped but NOT extracted text = "unclosed thinking..." thinking, clean = extract_think(text) assert thinking is None assert clean == "" def test_empty_think_block(self): text = "Hello World" thinking, clean = extract_think(text) # Empty blocks result in empty string after strip assert thinking == "" assert clean == "Hello World" def test_think_with_whitespace_only(self): text = "Hello \n World" thinking, clean = extract_think(text) assert thinking is None assert clean == "Hello \n World" def test_mixed_think_and_thought(self): text = "Startfirst reasoningmiddlesecond reasoningEnd" thinking, clean = extract_think(text) assert thinking == "first reasoning\n\nsecond reasoning" assert clean == "StartmiddleEnd" def test_real_world_ollama_response(self): text = """ The user is asking about Python list comprehensions. Let me explain the syntax and give examples. List comprehensions in Python provide a concise way to create lists. Here's the syntax: ```python [expression for item in iterable if condition] ``` For example: ```python squares = [x**2 for x in range(10)] ```""" thinking, clean = extract_think(text) assert "list comprehensions" in thinking.lower() assert "Let me explain" in thinking assert "List comprehensions in Python" in clean assert "" not in clean assert "" not in clean class TestExtractReasoning: """Single source of truth for reasoning extraction across all providers.""" def test_prefers_reasoning_content_and_strips_inline_think(self): # Dedicated field wins; inline tags are still scrubbed from content. reasoning, content = extract_reasoning( "dedicated", None, "inlinevisible answer", ) assert reasoning == "dedicated" assert content == "visible answer" def test_falls_back_to_thinking_blocks(self): reasoning, content = extract_reasoning( None, [ {"type": "thinking", "thinking": "step 1"}, {"type": "thinking", "thinking": "step 2"}, {"type": "redacted_thinking"}, ], "hello", ) assert reasoning == "step 1\n\nstep 2" assert content == "hello" def test_falls_back_to_inline_think_tags(self): reasoning, content = extract_reasoning( None, None, "plananswer" ) assert reasoning == "plan" assert content == "answer" def test_no_reasoning_returns_none(self): reasoning, content = extract_reasoning(None, None, "plain answer") assert reasoning is None assert content == "plain answer" def test_empty_thinking_blocks_falls_through_to_inline(self): reasoning, content = extract_reasoning( None, [], "plananswer" ) assert reasoning == "plan" assert content == "answer"