From 9fefb31344792447fb7db49df59e2b980c1221e6 Mon Sep 17 00:00:00 2001
From: chengyongru <chengyongru.ai@gmail.com>
Date: Thu, 7 May 2026 18:01:58 +0800
Subject: [PATCH] fix(weixin): treat ret=-2 as non-fatal on sendmessage and
 align client_id format

The iLink sendmessage API frequently returns ret=-2 (parameter error / rate
limit / expired token) even when HTTP status is 200.  The openclaw reference
plugin ignores the JSON body for sendmessage entirely and only checks HTTP
status.  Our previous strict ret checking turned ret=-2 into RuntimeError,
causing ChannelManager retries which only made things worse.

Changes:
- _send_text: swallow ret=-2 after one retry without context_token.
  Log request body + response at warning level for diagnostics.
- _send_media_file: same ret=-2 swallowing.
- _generate_client_id: change format to ``nanobot:{timestamp}-{hex}`` to
  match openclaw-weixin ``{prefix}:{Date.now()}-{hex}``.
- Update tests to expect swallowing instead of raising for ret=-2.
---
 nanobot/channels/weixin.py            | 80 +++++++++++++++++++--------
 tests/channels/test_weixin_channel.py | 16 +++---
 2 files changed, 64 insertions(+), 32 deletions(-)

diff --git a/nanobot/channels/weixin.py b/nanobot/channels/weixin.py
index 77f22b30c..d15d09267 100644
--- a/nanobot/channels/weixin.py
+++ b/nanobot/channels/weixin.py
@@ -18,7 +18,6 @@ import os
 import random
 import re
 import time
-import uuid
 from collections import OrderedDict
 from contextlib import suppress
 from pathlib import Path
@@ -1109,6 +1108,14 @@ class WeixinChannel(BaseChannel):
         except Exception as e:
             self.logger.debug("typing clear failed for {}: {}", chat_id, e)
 
+    @staticmethod
+    def _generate_client_id() -> str:
+        """Generate a client_id matching the reference plugin format.
+
+        openclaw-weixin uses ``{prefix}:{timestamp}-{8-char hex}``.
+        """
+        return f"nanobot:{int(time.time() * 1000)}-{os.urandom(4).hex()}"
+
     async def _send_text(
         self,
         to_user_id: str,
@@ -1116,7 +1123,7 @@ class WeixinChannel(BaseChannel):
         context_token: str,
     ) -> None:
         """Send a text message matching the exact protocol from send.ts."""
-        client_id = f"nanobot-{uuid.uuid4().hex[:12]}"
+        client_id = self._generate_client_id()
 
         item_list: list[dict] = []
         if text:
@@ -1143,32 +1150,48 @@ class WeixinChannel(BaseChannel):
         ret = data.get("ret", 0)
         errcode = data.get("errcode", 0)
 
-        # If ret=-2 (parameter error / rate limit / expired token) and we sent
-        # with a context_token, retry once without it.  The openclaw reference
-        # plugin can send without a token (it just warns), and issue #61174
-        # shows that expired context_tokens are a common cause of ret=-2.
-        if ret == -2 and context_token:
+        # The iLink sendmessage API frequently returns ret=-2 (parameter
+        # error / rate limit / expired token) even though HTTP status is 200.
+        # The openclaw reference plugin ignores the JSON body for sendmessage
+        # and only checks HTTP status.  We retry once without context_token
+        # (a common fix for token expiry per openclaw#61174), but if that
+        # also fails we swallow ret=-2 to avoid silent message drops.
+        if ret == -2:
+            if context_token:
+                self.logger.warning(
+                    "WeChat send text returned ret=-2 for {} (client_id={}); "
+                    "retrying without context_token",
+                    to_user_id,
+                    client_id,
+                )
+                body_no_ctx = copy.deepcopy(body)
+                body_no_ctx["msg"].pop("context_token", None)
+                data = await self._api_post("ilink/bot/sendmessage", body_no_ctx)
+                ret = data.get("ret", 0)
+                errcode = data.get("errcode", 0)
+                if ret == 0 and (errcode == 0 or errcode is None):
+                    self.logger.warning(
+                        "WeChat send text succeeded WITHOUT context_token for {}; "
+                        "clearing expired token from cache",
+                        to_user_id,
+                    )
+                    self._context_tokens.pop(to_user_id, None)
+                    self._save_state()
+                    self.logger.debug(
+                        "WeChat text sent to {} (client_id={})", to_user_id, client_id
+                    )
+                    return
+            # Treat persistent ret=-2 as non-fatal (matching reference plugin).
             self.logger.warning(
                 "WeChat send text returned ret=-2 for {} (client_id={}); "
-                "retrying without context_token",
+                "treating as non-fatal. Request body: {}. Response: {}",
                 to_user_id,
                 client_id,
+                json.dumps(body, ensure_ascii=False, default=str),
+                json.dumps(data, ensure_ascii=False, default=str),
             )
-            body_no_ctx = copy.deepcopy(body)
-            body_no_ctx["msg"].pop("context_token", None)
-            data = await self._api_post("ilink/bot/sendmessage", body_no_ctx)
-            ret = data.get("ret", 0)
-            errcode = data.get("errcode", 0)
-            if ret == 0 and (errcode == 0 or errcode is None):
-                self.logger.warning(
-                    "WeChat send text succeeded WITHOUT context_token for {}; "
-                    "clearing expired token from cache",
-                    to_user_id,
-                )
-                self._context_tokens.pop(to_user_id, None)
-                self._save_state()
-                self.logger.debug("WeChat text sent to {} (client_id={})", to_user_id, client_id)
-                return
+            self.logger.debug("WeChat text sent to {} (client_id={})", to_user_id, client_id)
+            return
 
         self._check_response_error(data, "send text", body=body)
         self.logger.debug("WeChat text sent to {} (client_id={})", to_user_id, client_id)
@@ -1297,7 +1320,7 @@ class WeixinChannel(BaseChannel):
             media_item["len"] = str(raw_size)
 
         # Send each media item as its own message (matching reference plugin)
-        client_id = f"nanobot-{uuid.uuid4().hex[:12]}"
+        client_id = self._generate_client_id()
         item_list: list[dict] = [{"type": item_type, item_key: media_item}]
 
         weixin_msg: dict[str, Any] = {
@@ -1317,6 +1340,15 @@ class WeixinChannel(BaseChannel):
         }
 
         data = await self._api_post("ilink/bot/sendmessage", body)
+        ret = data.get("ret", 0)
+        if ret == -2:
+            # See _send_text for rationale: openclaw ignores ret on sendmessage.
+            self.logger.warning(
+                "WeChat send media returned ret=-2 for {} (client_id={}); treating as non-fatal",
+                to_user_id,
+                client_id,
+            )
+            return
         self._check_response_error(data, "send media", body=body)
 
 
diff --git a/tests/channels/test_weixin_channel.py b/tests/channels/test_weixin_channel.py
index e5218272c..d903d7d18 100644
--- a/tests/channels/test_weixin_channel.py
+++ b/tests/channels/test_weixin_channel.py
@@ -1394,8 +1394,8 @@ async def test_send_text_retries_without_context_token_on_ret_minus_two() -> Non
 
 
 @pytest.mark.asyncio
-async def test_send_text_raises_when_ret_minus_two_retry_also_fails() -> None:
-    """If both attempts (with and without token) return ret=-2, raise."""
+async def test_send_text_swallows_ret_minus_two_when_retry_also_fails() -> None:
+    """If both attempts return ret=-2, swallow the error (matching openclaw)."""
     channel, _bus = _make_channel()
     channel._client = object()
     channel._token = "token"
@@ -1408,8 +1408,8 @@ async def test_send_text_raises_when_ret_minus_two_retry_also_fails() -> None:
         ]
     )
 
-    with pytest.raises(RuntimeError, match="ret=-2"):
-        await channel._send_text("wx-user", "hello", "bad-token")
+    # Should NOT raise
+    await channel._send_text("wx-user", "hello", "bad-token")
 
     assert channel._api_post.await_count == 2
     # Token is NOT cleared because retry also failed
@@ -1417,16 +1417,16 @@ async def test_send_text_raises_when_ret_minus_two_retry_also_fails() -> None:
 
 
 @pytest.mark.asyncio
-async def test_send_text_does_not_retry_without_token_when_no_context_token() -> None:
-    """If no context_token was provided, ret=-2 should raise immediately."""
+async def test_send_text_swallows_ret_minus_two_when_no_context_token() -> None:
+    """If no context_token was provided, ret=-2 is swallowed without retry."""
     channel, _bus = _make_channel()
     channel._client = object()
     channel._token = "token"
 
     channel._api_post = AsyncMock(return_value={"ret": -2})
 
-    with pytest.raises(RuntimeError, match="ret=-2"):
-        await channel._send_text("wx-user", "hello", "")
+    # Should NOT raise
+    await channel._send_text("wx-user", "hello", "")
 
     # Only one API call (no retry)
     channel._api_post.assert_awaited_once()