添加长期存储,流式检查
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Has been cancelled

This commit is contained in:
2026-04-17 01:26:05 +08:00
parent 602d551fd1
commit 404efde282
37 changed files with 794 additions and 2095 deletions

View File

@@ -32,15 +32,19 @@ def create_llm_call_node(llm: BaseLLM, tools: list):
# 构建调用链
prompt = create_system_prompt()
llm_with_tools = llm.bind_tools(tools)
chain = prompt | RunnableLambda(print_llm_input) | llm_with_tools
async def call_llm(state: MessagesState, runtime: Runtime[GraphContext]) -> Dict[str, Any]:
# 恢复带 RunnableLambda 的链,并在下方使用 astream 遍历
chain = prompt | llm_with_tools
from langchain_core.runnables.config import RunnableConfig
async def call_llm(state: MessagesState, config: RunnableConfig) -> Dict[str, Any]:
"""
LLM 调用节点(异步方法)
Args:
state: 当前对话状态
runtime: LangGraph 运行时上下文
config: LangChain/LangGraph 自动注入的配置,包含 callbacks 等信息
Returns:
更新后的状态字典
@@ -48,17 +52,28 @@ def create_llm_call_node(llm: BaseLLM, tools: list):
log_state_change("llm_call", state, "进入")
memory_context = state.get("memory_context", "暂无用户信息")
loop = asyncio.get_event_loop()
start_time = time.time()
try:
response = await loop.run_in_executor(
None,
lambda: chain.invoke({
# 恢复为:手动进行 astream并将所有的 chunk 拼接成最终的 response 返回。
# LangGraph 会自动监听这期间产生的所有 token。
chunks = []
async for chunk in chain.astream(
{
"messages": state["messages"],
"memory_context": memory_context
})
)
},
config=config
):
chunks.append(chunk)
# 将所有 chunk 合并成最终的 AIMessage
if chunks:
response = chunks[0]
for chunk in chunks[1:]:
response = response + chunk
else:
response = AIMessage(content="")
elapsed_time = time.time() - start_time
@@ -85,13 +100,7 @@ def create_llm_call_node(llm: BaseLLM, tools: list):
if token_usage:
input_tokens = token_usage.get('prompt_tokens', token_usage.get('input_tokens', 0))
output_tokens = token_usage.get('completion_tokens', token_usage.get('output_tokens', 0))
# 打印响应统计信息
info(f"⏱️ [LLM统计] 调用耗时: {elapsed_time:.2f}")
info(f"📊 [LLM统计] Token用量: 输入={input_tokens}, 输出={output_tokens}, 总计={input_tokens + output_tokens}")
if token_usage:
debug(f"📋 [LLM统计] 详细用量: {token_usage}")
# 打印 LLM 的完整输出
debug("\n" + "="*80)
debug("📥 [LLM输出] 大模型返回的完整响应:")
@@ -99,6 +108,12 @@ def create_llm_call_node(llm: BaseLLM, tools: list):
debug(f" 内容长度: {len(str(response.content))} 字符")
debug("-"*80)
debug(f"{response.content}")
# 打印响应统计信息
info(f"⏱️ [LLM统计] 调用耗时: {elapsed_time:.2f}")
info(f"📊 [LLM统计] Token用量: 输入={input_tokens}, 输出={output_tokens}, 总计={input_tokens + output_tokens}")
if token_usage:
debug(f"📋 [LLM统计] 详细用量: {token_usage}")
debug("="*80 + "\n")
result = {