mirror of
https://github.com/HKUDS/nanobot.git
synced 2026-05-20 00:22:31 +00:00
fix(weixin): retry send without expired context_token on ret=-2
When the iLink API returns ret=-2 (parameter error), it is often caused by an expired context_token rather than a malformed payload. After a gateway restart, the cached token can become stale within ~90 seconds if no new inbound message refreshes it, causing all outbound replies to fail silently. Changes: - _send_text: retry once without context_token when ret=-2 and a token was present; if the retry succeeds, clear the expired token from cache. - Remove leftover @staticmethod on _check_response_error so self.logger and the body parameter work correctly. - Bump WEIXIN_CHANNEL_VERSION from 2.1.1 -> 2.1.7 to match the reference openclaw-weixin plugin. - Add tests covering the ret=-2 retry path, failure path, and no-token path. References: - openclaw/openclaw#61174 (context_token expiry after long agent turns) - hermes-agent#21011 (ret=-2 rate limiting / parameter error)
This commit is contained in:
parent
e9f4a868a8
commit
28358980ed
@ -11,6 +11,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import base64
|
import base64
|
||||||
|
import copy
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
@ -54,7 +55,7 @@ MESSAGE_TYPE_BOT = 2
|
|||||||
MESSAGE_STATE_FINISH = 2
|
MESSAGE_STATE_FINISH = 2
|
||||||
|
|
||||||
WEIXIN_MAX_MESSAGE_LEN = 4000
|
WEIXIN_MAX_MESSAGE_LEN = 4000
|
||||||
WEIXIN_CHANNEL_VERSION = "2.1.1"
|
WEIXIN_CHANNEL_VERSION = "2.1.7"
|
||||||
ILINK_APP_ID = "bot"
|
ILINK_APP_ID = "bot"
|
||||||
|
|
||||||
|
|
||||||
@ -526,8 +527,7 @@ class WeixinChannel(BaseChannel):
|
|||||||
f"WeChat session paused, {remaining_min} min remaining (errcode {ERRCODE_SESSION_EXPIRED})"
|
f"WeChat session paused, {remaining_min} min remaining (errcode {ERRCODE_SESSION_EXPIRED})"
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
def _check_response_error(self, data: dict, operation: str, *, body: dict | None = None) -> None:
|
||||||
def _check_response_error(data: dict, operation: str) -> None:
|
|
||||||
"""Check both ``ret`` and ``errcode`` like the reference TS code.
|
"""Check both ``ret`` and ``errcode`` like the reference TS code.
|
||||||
|
|
||||||
The iLink API may signal failure through either field (or both).
|
The iLink API may signal failure through either field (or both).
|
||||||
@ -537,10 +537,11 @@ class WeixinChannel(BaseChannel):
|
|||||||
ret = data.get("ret", 0)
|
ret = data.get("ret", 0)
|
||||||
errcode = data.get("errcode", 0)
|
errcode = data.get("errcode", 0)
|
||||||
is_error = (ret is not None and ret != 0) or (errcode is not None and errcode != 0)
|
is_error = (ret is not None and ret != 0) or (errcode is not None and errcode != 0)
|
||||||
if is_error:
|
if not is_error:
|
||||||
raise RuntimeError(
|
return
|
||||||
f"WeChat {operation} error (ret={ret}, errcode={errcode}): {data.get('errmsg', '')}"
|
raise RuntimeError(
|
||||||
)
|
f"WeChat {operation} error (ret={ret}, errcode={errcode}): {data.get('errmsg', '')}"
|
||||||
|
)
|
||||||
|
|
||||||
async def _poll_once(self) -> None:
|
async def _poll_once(self) -> None:
|
||||||
remaining = self._session_pause_remaining_s()
|
remaining = self._session_pause_remaining_s()
|
||||||
@ -1139,7 +1140,37 @@ class WeixinChannel(BaseChannel):
|
|||||||
}
|
}
|
||||||
|
|
||||||
data = await self._api_post("ilink/bot/sendmessage", body)
|
data = await self._api_post("ilink/bot/sendmessage", body)
|
||||||
self._check_response_error(data, "send text")
|
ret = data.get("ret", 0)
|
||||||
|
errcode = data.get("errcode", 0)
|
||||||
|
|
||||||
|
# If ret=-2 (parameter error / rate limit / expired token) and we sent
|
||||||
|
# with a context_token, retry once without it. The openclaw reference
|
||||||
|
# plugin can send without a token (it just warns), and issue #61174
|
||||||
|
# shows that expired context_tokens are a common cause of ret=-2.
|
||||||
|
if ret == -2 and context_token:
|
||||||
|
self.logger.warning(
|
||||||
|
"WeChat send text returned ret=-2 for {} (client_id={}); "
|
||||||
|
"retrying without context_token",
|
||||||
|
to_user_id,
|
||||||
|
client_id,
|
||||||
|
)
|
||||||
|
body_no_ctx = copy.deepcopy(body)
|
||||||
|
body_no_ctx["msg"].pop("context_token", None)
|
||||||
|
data = await self._api_post("ilink/bot/sendmessage", body_no_ctx)
|
||||||
|
ret = data.get("ret", 0)
|
||||||
|
errcode = data.get("errcode", 0)
|
||||||
|
if ret == 0 and (errcode == 0 or errcode is None):
|
||||||
|
self.logger.warning(
|
||||||
|
"WeChat send text succeeded WITHOUT context_token for {}; "
|
||||||
|
"clearing expired token from cache",
|
||||||
|
to_user_id,
|
||||||
|
)
|
||||||
|
self._context_tokens.pop(to_user_id, None)
|
||||||
|
self._save_state()
|
||||||
|
self.logger.debug("WeChat text sent to {} (client_id={})", to_user_id, client_id)
|
||||||
|
return
|
||||||
|
|
||||||
|
self._check_response_error(data, "send text", body=body)
|
||||||
self.logger.debug("WeChat text sent to {} (client_id={})", to_user_id, client_id)
|
self.logger.debug("WeChat text sent to {} (client_id={})", to_user_id, client_id)
|
||||||
|
|
||||||
async def _send_media_file(
|
async def _send_media_file(
|
||||||
@ -1286,7 +1317,7 @@ class WeixinChannel(BaseChannel):
|
|||||||
}
|
}
|
||||||
|
|
||||||
data = await self._api_post("ilink/bot/sendmessage", body)
|
data = await self._api_post("ilink/bot/sendmessage", body)
|
||||||
self._check_response_error(data, "send media")
|
self._check_response_error(data, "send media", body=body)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
@ -48,11 +48,11 @@ def test_make_headers_includes_route_tag_when_configured() -> None:
|
|||||||
assert headers["Authorization"] == "Bearer token"
|
assert headers["Authorization"] == "Bearer token"
|
||||||
assert headers["SKRouteTag"] == "123"
|
assert headers["SKRouteTag"] == "123"
|
||||||
assert headers["iLink-App-Id"] == "bot"
|
assert headers["iLink-App-Id"] == "bot"
|
||||||
assert headers["iLink-App-ClientVersion"] == str((2 << 16) | (1 << 8) | 1)
|
assert headers["iLink-App-ClientVersion"] == str((2 << 16) | (1 << 8) | 7)
|
||||||
|
|
||||||
|
|
||||||
def test_channel_version_matches_reference_plugin_version() -> None:
|
def test_channel_version_matches_reference_plugin_version() -> None:
|
||||||
assert WEIXIN_CHANNEL_VERSION == "2.1.1"
|
assert WEIXIN_CHANNEL_VERSION == "2.1.7"
|
||||||
|
|
||||||
|
|
||||||
def test_save_and_load_state_persists_context_tokens(tmp_path) -> None:
|
def test_save_and_load_state_persists_context_tokens(tmp_path) -> None:
|
||||||
@ -1362,3 +1362,71 @@ async def test_poll_loop_logs_exception_and_continues_on_poll_failure(monkeypatc
|
|||||||
|
|
||||||
assert call_count == 2
|
assert call_count == 2
|
||||||
assert any("WeChat poll loop error" in m for m in logged_messages)
|
assert any("WeChat poll loop error" in m for m in logged_messages)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_send_text_retries_without_context_token_on_ret_minus_two() -> None:
|
||||||
|
"""If sendmessage returns ret=-2 with a context_token, retry without it."""
|
||||||
|
channel, _bus = _make_channel()
|
||||||
|
channel._client = object()
|
||||||
|
channel._token = "token"
|
||||||
|
channel._context_tokens["wx-user"] = "expired-token"
|
||||||
|
|
||||||
|
channel._api_post = AsyncMock(
|
||||||
|
side_effect=[
|
||||||
|
{"ret": -2}, # first attempt with token fails
|
||||||
|
{"ret": 0}, # retry without token succeeds
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
await channel._send_text("wx-user", "hello", "expired-token")
|
||||||
|
|
||||||
|
# Should have called API twice
|
||||||
|
assert channel._api_post.await_count == 2
|
||||||
|
# First call includes context_token
|
||||||
|
first_body = channel._api_post.await_args_list[0].args[1]
|
||||||
|
assert first_body["msg"]["context_token"] == "expired-token"
|
||||||
|
# Second call does NOT include context_token
|
||||||
|
second_body = channel._api_post.await_args_list[1].args[1]
|
||||||
|
assert "context_token" not in second_body["msg"]
|
||||||
|
# Expired token should be cleared from cache
|
||||||
|
assert "wx-user" not in channel._context_tokens
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_send_text_raises_when_ret_minus_two_retry_also_fails() -> None:
|
||||||
|
"""If both attempts (with and without token) return ret=-2, raise."""
|
||||||
|
channel, _bus = _make_channel()
|
||||||
|
channel._client = object()
|
||||||
|
channel._token = "token"
|
||||||
|
channel._context_tokens["wx-user"] = "bad-token"
|
||||||
|
|
||||||
|
channel._api_post = AsyncMock(
|
||||||
|
side_effect=[
|
||||||
|
{"ret": -2}, # with token
|
||||||
|
{"ret": -2}, # without token
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(RuntimeError, match="ret=-2"):
|
||||||
|
await channel._send_text("wx-user", "hello", "bad-token")
|
||||||
|
|
||||||
|
assert channel._api_post.await_count == 2
|
||||||
|
# Token is NOT cleared because retry also failed
|
||||||
|
assert channel._context_tokens.get("wx-user") == "bad-token"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_send_text_does_not_retry_without_token_when_no_context_token() -> None:
|
||||||
|
"""If no context_token was provided, ret=-2 should raise immediately."""
|
||||||
|
channel, _bus = _make_channel()
|
||||||
|
channel._client = object()
|
||||||
|
channel._token = "token"
|
||||||
|
|
||||||
|
channel._api_post = AsyncMock(return_value={"ret": -2})
|
||||||
|
|
||||||
|
with pytest.raises(RuntimeError, match="ret=-2"):
|
||||||
|
await channel._send_text("wx-user", "hello", "")
|
||||||
|
|
||||||
|
# Only one API call (no retry)
|
||||||
|
channel._api_post.assert_awaited_once()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user