修复: final_response_node 调用 LLM 并支持流式输出
All checks were successful
构建并部署 AI Agent 服务 / deploy (push) Successful in 6m36s

This commit is contained in:
2026-05-01 13:42:12 +08:00
parent 3051a34ce7
commit 9ed946cbe3

View File

@@ -235,9 +235,12 @@ def error_handling_node(state: MainGraphState) -> MainGraphState:
# ========== 3. 最终回答节点 ==========
def final_response_node(state: MainGraphState) -> MainGraphState:
from langchain_core.runnables.config import RunnableConfig
from langchain_core.messages import AIMessage
async def final_response_node(state: MainGraphState, config: RunnableConfig) -> MainGraphState:
"""
最终回答节点:整理并生成最终回答
最终回答节点:调用 LLM 生成最终回答(支持流式输出)
"""
state.current_phase = "finalizing"
@@ -246,33 +249,78 @@ def final_response_node(state: MainGraphState) -> MainGraphState:
state.current_phase = "done"
return state
# 构建最终回答
parts = []
import time
start_time = time.time()
# 添加 RAG 上下文(如果有)
if state.rag_context:
parts.append(state.rag_context)
parts.append("---")
try:
# 构建 LLM 调用链
from app.agent.prompts import create_system_prompt
from app.model_services.chat_services import get_chat_service
from app.logger import debug, info
llm = get_chat_service()
prompt = create_system_prompt(tools=[])
chain = prompt | llm
# 添加子图结果(如果有)
if state.contact_result and hasattr(state.contact_result, "get"):
if state.contact_result.get("final_result"):
parts.append(state.contact_result["final_result"])
if state.dictionary_result and hasattr(state.dictionary_result, "get"):
if state.dictionary_result.get("final_result"):
parts.append(state.dictionary_result["final_result"])
if state.news_result and hasattr(state.news_result, "get"):
if state.news_result.get("final_result"):
parts.append(state.news_result["final_result"])
# 构建上下文
memory_context = getattr(state, "memory_context", "暂无用户信息")
# 添加 RAG 上下文到消息
messages_with_context = list(state.messages)
if state.rag_context:
# 把 RAG 上下文作为系统消息添加
from langchain_core.messages import SystemMessage
rag_system_msg = SystemMessage(content=f"以下是检索到的相关信息:\n{state.rag_context}")
# 插入到第一个用户消息之前
inserted = False
for i, msg in enumerate(messages_with_context):
if msg.type == "human":
messages_with_context.insert(i, rag_system_msg)
inserted = True
break
if not inserted:
messages_with_context.insert(0, rag_system_msg)
# 调用 LLM流式输出
chunks = []
async for chunk in chain.astream(
{
"messages": messages_with_context,
"memory_context": memory_context
},
config=config
):
chunks.append(chunk)
# 如果都没有,用默认回答
if not parts:
parts.append(f"我理解了您的问题:{state.user_query}")
state.final_result = "\n".join(parts)
state.success = True
state.current_phase = "done"
state.end_time = datetime.now().isoformat()
# 将所有 chunk 合并成最终的 AIMessage
if chunks:
response = chunks[0]
for chunk in chunks[1:]:
response = response + chunk
else:
response = AIMessage(content="")
elapsed_time = time.time() - start_time
# 更新状态
state.messages.append(response)
state.final_result = response.content
state.success = True
state.current_phase = "done"
state.end_time = datetime.now().isoformat()
state.llm_calls = getattr(state, "llm_calls", 0) + 1
info(f"⏱️ [LLM统计] 调用耗时: {elapsed_time:.2f}")
except Exception as e:
from app.logger import error
import traceback
error(f"❌ [LLM错误] 调用失败: {e}")
traceback.print_exc()
state.final_result = "抱歉,模型暂时无法响应,请稍后再试。"
state.success = False
state.current_phase = "done"
return state