添加长期存储，流式检查

2026-04-17 01:26:05 +08:00
parent 602d551fd1
commit 404efde282
37 changed files with 794 additions and 2095 deletions
--- a/app/nodes/llm_call.py
+++ b/app/nodes/llm_call.py
@@ -32,15 +32,19 @@ def create_llm_call_node(llm: BaseLLM, tools: list):
    # 构建调用链
    prompt = create_system_prompt()
    llm_with_tools = llm.bind_tools(tools)
-    chain = prompt | RunnableLambda(print_llm_input) | llm_with_tools
    
-    async def call_llm(state: MessagesState, runtime: Runtime[GraphContext]) -> Dict[str, Any]:
+    # 恢复带 RunnableLambda 的链，并在下方使用 astream 遍历
+    chain = prompt  | llm_with_tools
+    
+    from langchain_core.runnables.config import RunnableConfig
+    
+    async def call_llm(state: MessagesState, config: RunnableConfig) -> Dict[str, Any]:
        """
        LLM 调用节点（异步方法）
        
        Args:
            state: 当前对话状态
-            runtime: LangGraph 运行时上下文
+            config: LangChain/LangGraph 自动注入的配置，包含 callbacks 等信息
            
        Returns:
            更新后的状态字典
@@ -48,17 +52,28 @@ def create_llm_call_node(llm: BaseLLM, tools: list):
        log_state_change("llm_call", state, "进入")
        
        memory_context = state.get("memory_context", "暂无用户信息")
-        loop = asyncio.get_event_loop()
        start_time = time.time()
        
        try:
-            response = await loop.run_in_executor(
-                None, 
-                lambda: chain.invoke({
+            # 恢复为：手动进行 astream，并将所有的 chunk 拼接成最终的 response 返回。
+            # LangGraph 会自动监听这期间产生的所有 token。
+            chunks = []
+            async for chunk in chain.astream(
+                {
                    "messages": state["messages"],
                    "memory_context": memory_context
-                })
-            )
+                },
+                config=config
+            ):
+                chunks.append(chunk)
+
+            # 将所有 chunk 合并成最终的 AIMessage
+            if chunks:
+                response = chunks[0]
+                for chunk in chunks[1:]:
+                    response = response + chunk
+            else:
+                response = AIMessage(content="")
            
            elapsed_time = time.time() - start_time
            
@@ -85,13 +100,7 @@ def create_llm_call_node(llm: BaseLLM, tools: list):
            if token_usage:
                input_tokens = token_usage.get('prompt_tokens', token_usage.get('input_tokens', 0))
                output_tokens = token_usage.get('completion_tokens', token_usage.get('output_tokens', 0))
-            
-            # 打印响应统计信息
-            info(f"⏱️  [LLM统计] 调用耗时: {elapsed_time:.2f}秒")
-            info(f"📊 [LLM统计] Token用量: 输入={input_tokens}, 输出={output_tokens}, 总计={input_tokens + output_tokens}")
-            if token_usage:
-                debug(f"📋 [LLM统计] 详细用量: {token_usage}")
-            
+           
            # 打印 LLM 的完整输出
            debug("\n" + "="*80)
            debug("📥 [LLM输出] 大模型返回的完整响应:")
@@ -99,6 +108,12 @@ def create_llm_call_node(llm: BaseLLM, tools: list):
            debug(f"   内容长度: {len(str(response.content))} 字符")
            debug("-"*80)
            debug(f"{response.content}")
+
+            # 打印响应统计信息
+            info(f"⏱️  [LLM统计] 调用耗时: {elapsed_time:.2f}秒")
+            info(f"📊 [LLM统计] Token用量: 输入={input_tokens}, 输出={output_tokens}, 总计={input_tokens + output_tokens}")
+            if token_usage:
+                debug(f"📋 [LLM统计] 详细用量: {token_usage}")
            debug("="*80 + "\n")
            
            result = {