feat: 实现真正的 LLM 流式 token 发送

2026-05-07 02:05:23 +08:00
parent 6332e30d2f
commit 6d7f8758d2
3 changed files with 176 additions and 69 deletions
--- a/backend/app/main_graph/nodes/agent.py
+++ b/backend/app/main_graph/nodes/agent.py
@@ -1,10 +1,11 @@
 """Agent 节点：核心推理与工具调用"""

 from typing import Dict, Any, Optional
-from langchain_core.messages import SystemMessage
+from langchain_core.messages import SystemMessage, AIMessage, AIMessageChunk
 from langchain_core.runnables.config import RunnableConfig
 from ..state import AgentState
-from backend.app.logger import info, warning
+from backend.app.logger import info, warning, error
+from .stream_context import token_queue_var


 # 系统提示词（从 main_graph_builder.py 搬过来）
@@ -77,23 +78,81 @@ def create_agent_node(llm_with_tools, llm):
            # 判断是否达到步数上限
            if state.current_step >= state.max_steps:
                info(f"[Agent] 达到步数上限 {state.max_steps}，强制结束，不绑定工具")
-                llm_no_tools = llm.bind_tools([])
-                response = await llm_no_tools.ainvoke(full_messages)
+                current_llm = llm.bind_tools([])
            else:
-                info(f"[Agent] 调用带工具的 LLM...")
-                response = await llm_with_tools.ainvoke(full_messages)
-            
+                current_llm = llm_with_tools
+
+            info(f"[Agent] 调用带工具的 LLM...")
+
+            # 获取 token 队列
+            token_queue = token_queue_var.get()
+
+            # 完整消息
+            full_content = ""
+            full_reasoning_content = ""
+            full_tool_calls = []
+
+            # 流式调用 LLM
+            async for chunk in current_llm.astream(full_messages):
+                if isinstance(chunk, AIMessageChunk):
+                    # 处理 content
+                    if chunk.content:
+                        full_content += chunk.content
+                        if token_queue:
+                            await token_queue.put({
+                                "type": "llm_token",
+                                "node": "agent",
+                                "token": chunk.content,
+                                "reasoning_token": ""
+                            })
+
+                    # 处理 reasoning_content
+                    if hasattr(chunk, 'additional_kwargs') and chunk.additional_kwargs:
+                        reasoning_content = chunk.additional_kwargs.get("reasoning_content", "")
+                        if reasoning_content:
+                            full_reasoning_content += reasoning_content
+                            if token_queue:
+                                await token_queue.put({
+                                    "type": "llm_token",
+                                    "node": "agent",
+                                    "token": "",
+                                    "reasoning_token": reasoning_content
+                                })
+
+                    # 处理 tool_calls
+                    if hasattr(chunk, 'tool_calls') and chunk.tool_calls:
+                        # 合并 tool_calls
+                        for tc in chunk.tool_calls:
+                            # 查找是否已经有这个 id 的 tool_call
+                            found = False
+                            for existing_tc in full_tool_calls:
+                                if existing_tc.get("id") == tc.get("id"):
+                                    # 合并 args
+                                    existing_tc["args"] = {**existing_tc.get("args", {}), **tc.get("args", {})}
+                                    found = True
+                                    break
+                            if not found:
+                                full_tool_calls.append(tc)
+
+            # 构建完整的 AIMessage
+            response = AIMessage(
+                content=full_content,
+                tool_calls=full_tool_calls if full_tool_calls else None
+            )
+            if full_reasoning_content:
+                response.additional_kwargs["reasoning_content"] = full_reasoning_content
+
            info(f"[Agent] LLM 调用成功！响应类型: {type(response).__name__}")
            if hasattr(response, 'tool_calls') and response.tool_calls:
                info(f"[Agent] 检测到工具调用: {[tc['name'] for tc in response.tool_calls]}")

-            # 返回状态更新（注意：不原地修改 state，返回字典让 LangGraph 处理
+            # 返回状态更新
            return {
                "messages": [response],
                "current_step": state.current_step + 1,
                "llm_calls": state.llm_calls + 1
            }
-        
+
        except Exception as e:
            error(f"[Agent] ❌ 第 {state.current_step} 步推理出错: {e}")
            import traceback