修复: final_response_node 调用 LLM 并支持流式输出

2026-05-01 13:42:12 +08:00
parent 3051a34ce7
commit 9ed946cbe3
1 changed files with 74 additions and 26 deletions
--- a/backend/app/main_graph/nodes/react_nodes.py
+++ b/backend/app/main_graph/nodes/react_nodes.py
@@ -235,9 +235,12 @@ def error_handling_node(state: MainGraphState) -> MainGraphState:

 # ========== 3. 最终回答节点 ==========

-def final_response_node(state: MainGraphState) -> MainGraphState:
+from langchain_core.runnables.config import RunnableConfig
+from langchain_core.messages import AIMessage
+
+async def final_response_node(state: MainGraphState, config: RunnableConfig) -> MainGraphState:
    """
-    最终回答节点：整理并生成最终回答
+    最终回答节点：调用 LLM 生成最终回答（支持流式输出）
    """
    state.current_phase = "finalizing"

@@ -246,33 +249,78 @@ def final_response_node(state: MainGraphState) -> MainGraphState:
        state.current_phase = "done"
        return state

-    # 构建最终回答
-    parts = []
+    import time
+    start_time = time.time()

-    # 添加 RAG 上下文（如果有）
-    if state.rag_context:
-        parts.append(state.rag_context)
-        parts.append("---")
+    try:
+        # 构建 LLM 调用链
+        from app.agent.prompts import create_system_prompt
+        from app.model_services.chat_services import get_chat_service
+        from app.logger import debug, info
+        
+        llm = get_chat_service()
+        prompt = create_system_prompt(tools=[])
+        chain = prompt | llm

-    # 添加子图结果（如果有）
-    if state.contact_result and hasattr(state.contact_result, "get"):
-        if state.contact_result.get("final_result"):
-            parts.append(state.contact_result["final_result"])
-    if state.dictionary_result and hasattr(state.dictionary_result, "get"):
-        if state.dictionary_result.get("final_result"):
-            parts.append(state.dictionary_result["final_result"])
-    if state.news_result and hasattr(state.news_result, "get"):
-        if state.news_result.get("final_result"):
-            parts.append(state.news_result["final_result"])
+        # 构建上下文
+        memory_context = getattr(state, "memory_context", "暂无用户信息")
+        
+        # 添加 RAG 上下文到消息
+        messages_with_context = list(state.messages)
+        if state.rag_context:
+            # 把 RAG 上下文作为系统消息添加
+            from langchain_core.messages import SystemMessage
+            rag_system_msg = SystemMessage(content=f"以下是检索到的相关信息：\n{state.rag_context}")
+            # 插入到第一个用户消息之前
+            inserted = False
+            for i, msg in enumerate(messages_with_context):
+                if msg.type == "human":
+                    messages_with_context.insert(i, rag_system_msg)
+                    inserted = True
+                    break
+            if not inserted:
+                messages_with_context.insert(0, rag_system_msg)
+        
+        # 调用 LLM（流式输出）
+        chunks = []
+        async for chunk in chain.astream(
+            {
+                "messages": messages_with_context,
+                "memory_context": memory_context
+            },
+            config=config
+        ):
+            chunks.append(chunk)

-    # 如果都没有，用默认回答
-    if not parts:
-        parts.append(f"我理解了您的问题：{state.user_query}")
-
-    state.final_result = "\n".join(parts)
-    state.success = True
-    state.current_phase = "done"
-    state.end_time = datetime.now().isoformat()
+        # 将所有 chunk 合并成最终的 AIMessage
+        if chunks:
+            response = chunks[0]
+            for chunk in chunks[1:]:
+                response = response + chunk
+        else:
+            response = AIMessage(content="")
+        
+        elapsed_time = time.time() - start_time
+        
+        # 更新状态
+        state.messages.append(response)
+        state.final_result = response.content
+        state.success = True
+        state.current_phase = "done"
+        state.end_time = datetime.now().isoformat()
+        state.llm_calls = getattr(state, "llm_calls", 0) + 1
+        
+        info(f"⏱️  [LLM统计] 调用耗时: {elapsed_time:.2f}秒")
+        
+    except Exception as e:
+        from app.logger import error
+        import traceback
+        error(f"❌ [LLM错误] 调用失败: {e}")
+        traceback.print_exc()
+        
+        state.final_result = "抱歉，模型暂时无法响应，请稍后再试。"
+        state.success = False
+        state.current_phase = "done"

    return state