perf(loop): parallelize tool execution with asyncio.gather

Tool calls from a single LLM response are independent by design —
the model batches them precisely because they can run concurrently.
Replace the serial for-loop with asyncio.gather so N tools complete
in max(time_i) instead of sum(time_i).

Made-with: Cursor
This commit is contained in:
xzq.xu 2026-03-18 17:05:32 +08:00 committed by chengyongru
parent e70c2ead23
commit ae04f2e3e4

View File

@ -243,11 +243,19 @@ class AgentLoop:
thinking_blocks=response.thinking_blocks,
)
for tool_call in response.tool_calls:
tools_used.append(tool_call.name)
args_str = json.dumps(tool_call.arguments, ensure_ascii=False)
logger.info("Tool call: {}({})", tool_call.name, args_str[:200])
result = await self.tools.execute(tool_call.name, tool_call.arguments)
for tc in response.tool_calls:
tools_used.append(tc.name)
args_str = json.dumps(tc.arguments, ensure_ascii=False)
logger.info("Tool call: {}({})", tc.name, args_str[:200])
# Execute all tool calls concurrently — the LLM batches
# independent calls in a single response on purpose.
results = await asyncio.gather(*(
self.tools.execute(tc.name, tc.arguments)
for tc in response.tool_calls
))
for tool_call, result in zip(response.tool_calls, results):
messages = self.context.add_tool_result(
messages, tool_call.id, tool_call.name, result
)