From ae04f2e3e4caad147941e9e84ddb986919599e3d Mon Sep 17 00:00:00 2001 From: "xzq.xu" Date: Wed, 18 Mar 2026 17:05:32 +0800 Subject: [PATCH] perf(loop): parallelize tool execution with asyncio.gather MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tool calls from a single LLM response are independent by design — the model batches them precisely because they can run concurrently. Replace the serial for-loop with asyncio.gather so N tools complete in max(time_i) instead of sum(time_i). Made-with: Cursor --- nanobot/agent/loop.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py index 92c6101a7..7973b1f84 100644 --- a/nanobot/agent/loop.py +++ b/nanobot/agent/loop.py @@ -243,11 +243,19 @@ class AgentLoop: thinking_blocks=response.thinking_blocks, ) - for tool_call in response.tool_calls: - tools_used.append(tool_call.name) - args_str = json.dumps(tool_call.arguments, ensure_ascii=False) - logger.info("Tool call: {}({})", tool_call.name, args_str[:200]) - result = await self.tools.execute(tool_call.name, tool_call.arguments) + for tc in response.tool_calls: + tools_used.append(tc.name) + args_str = json.dumps(tc.arguments, ensure_ascii=False) + logger.info("Tool call: {}({})", tc.name, args_str[:200]) + + # Execute all tool calls concurrently — the LLM batches + # independent calls in a single response on purpose. + results = await asyncio.gather(*( + self.tools.execute(tc.name, tc.arguments) + for tc in response.tool_calls + )) + + for tool_call, result in zip(response.tool_calls, results): messages = self.context.add_tool_result( messages, tool_call.id, tool_call.name, result )