feat: 实现真正的 LLM 流式 token 发送
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Has been cancelled

This commit is contained in:
2026-05-07 02:05:23 +08:00
parent 6332e30d2f
commit 6d7f8758d2
3 changed files with 176 additions and 69 deletions

View File

@@ -15,6 +15,7 @@ from ..model_services import get_cached_chat_services
from ..main_graph.main_graph_builder import build_agent_graph
from backend.app.logger import debug, info, warning, error
from ..main_graph.state import AgentState
from .stream_context import token_queue_var
class AIAgentService:
@@ -251,71 +252,107 @@ class AIAgentService:
chunk_count = 0
full_message_content = ""
try:
info(f"📡 开始调用 graph.astream()...")
event_count = 0
async for chunk in self.graph.astream(
input_state,
config=config,
stream_mode=["messages", "updates"],
version="v2",
subgraphs=True
):
chunk_count += 1
chunk_type = chunk["type"]
# 创建 token 队列
token_queue = asyncio.Queue()
# 设置上下文变量
token_queue_var.set(token_queue)
# 事件graph 执行完成
graph_done = asyncio.Event()
graph_error = None
async def run_graph():
"""在后台运行 graph并把 chunk 放进队列,同时也处理 events"""
nonlocal chunk_count, full_message_content, graph_error
try:
info(f"📡 开始调用 graph.astream()...")
# 记录原始 chunk 信息(前 10 个和后 10 个)
if chunk_count <= 10 or chunk_count % 50 == 0:
info(f" [{chunk_count}] chunk_type={chunk_type}, data={type(chunk.get('data'))}")
event_count = 0
async for chunk in self.graph.astream(
input_state,
config=config,
stream_mode=["messages", "updates"],
version="v2",
subgraphs=True
):
chunk_count += 1
chunk_type = chunk["type"]
# 记录原始 chunk 信息(前 10 个和后 10 个)
if chunk_count <= 10 or chunk_count % 50 == 0:
info(f" [{chunk_count}] chunk_type={chunk_type}, data={type(chunk.get('data'))}")
if chunk_type == "messages":
async for event in self._handle_message_chunk(
chunk, current_node, tool_calls_in_progress
):
if event.get("type") == "_update_state":
current_node = event.get("current_node", current_node)
else:
event_count += 1
# 记录前 10 个事件
if event_count <= 10:
info(f" → yield event #{event_count}: {event.get('type')}")
# 如果是 agent 节点的 token收集完整消息
if (
event.get("type") == "llm_token"
and event.get("node") == "agent"
and "token" in event
):
full_message_content += event["token"]
yield event
if chunk_type == "messages":
async for event in self._handle_message_chunk(
chunk, current_node, tool_calls_in_progress
):
if event.get("type") == "_update_state":
nonlocal current_node
current_node = event.get("current_node", current_node)
else:
event_count += 1
# 记录前 10 个事件
if event_count <= 10:
info(f" → yield event #{event_count}: {event.get('type')}")
# 如果是 agent 节点的 token收集完整消息
if (
event.get("type") == "llm_token"
and event.get("node") == "agent"
and "token" in event
):
full_message_content += event["token"]
await token_queue.put(event)
elif chunk_type == "updates":
async for event in self._handle_updates_chunk(
chunk, tool_calls_in_progress, actual_model_used
):
if event.get("type") == "_update_state":
actual_model_used = event.get("actual_model_used", actual_model_used)
else:
event_count += 1
if event_count <= 10:
info(f" → yield event #{event_count}: {event.get('type')}")
yield event
elif chunk_type == "updates":
async for event in self._handle_updates_chunk(
chunk, tool_calls_in_progress, actual_model_used
):
if event.get("type") == "_update_state":
nonlocal actual_model_used
actual_model_used = event.get("actual_model_used", actual_model_used)
else:
event_count += 1
if event_count <= 10:
info(f" → yield event #{event_count}: {event.get('type')}")
await token_queue.put(event)
# 完整消息集合完成后,一次性打印
info(f"✅ graph.astream() 完成,共 {chunk_count} 个 chunks, {event_count} 个 events")
if full_message_content:
info(f"📄 完整消息内容: {repr(full_message_content)}")
# 完整消息集合完成后,一次性打印
info(f"✅ graph.astream() 完成,共 {chunk_count} 个 chunks, {event_count} 个 events")
if full_message_content:
info(f"📄 完整消息内容: {repr(full_message_content)}")
except Exception as e:
error(f"❌ 执行图时出错: {e}")
import traceback
error(f"📋 堆栈: {traceback.format_exc()}")
graph_error = e
await token_queue.put({
"type": "error",
"message": str(e)
})
finally:
graph_done.set()
# 启动后台任务运行 graph
graph_task = asyncio.create_task(run_graph())
try:
# 从队列里取事件并 yield
while True:
# 尝试从队列取事件,超时检查 graph 是否完成
try:
event = await asyncio.wait_for(token_queue.get(), timeout=0.1)
yield event
except asyncio.TimeoutError:
# 检查 graph 是否完成
if graph_done.is_set():
break
# 如果 graph 有错误,已经在 run_graph 里 yield error 了
except Exception as e:
error(f"❌ 执行图时出错: {e}")
import traceback
error(f"📋 堆栈: {traceback.format_exc()}")
yield {
"type": "error",
"message": str(e)
}
finally:
# 无论成功或失败,都发送结束事件,保证前端平稳关闭
if current_node:
@@ -327,3 +364,5 @@ class AIAgentService:
"type": "done",
"model_used": actual_model_used
}
# 取消任务
graph_task.cancel()

View File

@@ -0,0 +1,9 @@
"""流式上下文,用于在 LangGraph 节点和 agent_service 之间传递 token 回调"""
import contextvars
import asyncio
from typing import Optional, Any
# 上下文变量:存储当前的 token 队列
token_queue_var: contextvars.ContextVar[Optional[asyncio.Queue]] = contextvars.ContextVar(
"token_queue", default=None
)