Files
ailine/backend/app/main_graph/nodes/hybrid_router.py
root d96301e4d5
All checks were successful
构建并部署 AI Agent 服务 / deploy (push) Successful in 5m36s
重构:增强 JSON 解析稳定性,优化 Prompt,改进状态结构
主要改进:

1. 新增 json_parser.py - 统一的 JSON 解析工具
   - 支持多种格式(纯 JSON、markdown、文本中的 JSON)
   - 多层 fallback 策略
   - 安全的字段提取函数

2. 优化 intent.py 和 hybrid_router.py
   - 使用新的 json_parser
   - 优化 Prompt,更清晰的格式要求
   - 更好的错误处理

3. 改进 state.py - 新增结构化状态字段
   - ReactReasoningState、HybridRouterState、FastPathState
   - 向后兼容旧的 debug_info

4. 更新各节点模块 - 同时更新旧字段保持兼容
   - reasoning.py - 更新 state.react_reasoning
   - hybrid_router.py - 更新 state.hybrid_router
   - fast_paths.py - 更新 state.fast_path
2026-05-06 13:34:32 +08:00

246 lines
8.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
混合路由节点模块 - 前置路由决策
负责决定走快速路径还是 React 循环
"""
import re
import json
from typing import Optional
from dataclasses import dataclass, field
from datetime import datetime
from langchain_core.runnables.config import RunnableConfig
from ..state import MainGraphState
from backend.app.logger import info, debug
from ...model_services.chat_services import get_small_llm_service
from ._utils import dispatch_custom_event
from backend.app.core.json_parser import extract_and_parse_json, safe_get, safe_get_float, safe_get_str
# ========== 核心数据类型 ==========
@dataclass
class HybridRouterResult:
"""混合路由结果"""
intent: str = "complex" # chitchat / knowledge / tool / complex
confidence: float = 0.0
suggested_tools: list = field(default_factory=list)
path: str = "react_loop" # fast_chitchat / fast_rag / fast_tool / react_loop
reasoning: str = ""
# ========== 规则配置 ==========
CHITCHAT_KEYWORDS = {
"你好", "您好", "hi", "hello", "hey", "早上好", "晚上好", "下午好",
"谢谢", "感谢", "多谢", "thanks", "thank you",
"再见", "拜拜", "goodbye", "bye"
}
SUBGRAPH_KEYWORDS = {
"contact": ["通讯录", "联系人", "contact", "email", "邮件", "邮箱"],
"dictionary": ["词典", "单词", "翻译", "dictionary", "translate", "生词"],
"news_analysis": ["资讯", "新闻", "分析", "news", "report", "热点"]
}
# ========== 意图分类 Prompt 模板 ==========
INTENT_CLASSIFICATION_PROMPT = """你是一个专业的意图分类助手。请分析用户的查询,并输出 JSON 格式的结果。
【格式要求】
你必须严格输出 JSON 格式,不要加任何 Markdown 代码块标记(如 ```json
仅输出纯 JSON 字符串,不要有其他解释文字。
【意图类型4选一
- chitchat: 闲聊、问候、感谢、道别(不需要工具)
- knowledge: 知识查询(需要查询知识库)
- tool: 工具操作(需要调用通讯录/词典/新闻等子图)
- complex: 复杂任务(多步骤、不确定、或需要推理)
【输出格式】
{{
"intent": "chitchat|knowledge|tool|complex",
"confidence": 0.85,
"reasoning": "简要说明理由",
"suggested_tools": ["contact|dictionary|news_analysis", "other"]
}}
【重要提示】
- 如果不能100%确定意图,请选择 "complex",置信度设低一些。
- confidence 是你对当前分类的信心0.0-1.0)。
- suggested_tools 仅在 intent=tool 时提供,否则设为空数组。
【用户查询】
{query}
【现在开始】
请根据以上信息,输出你的分类 JSON"""
# ========== 规则分流(<5ms ==========
def _rule_based_redirect(query: str) -> Optional[HybridRouterResult]:
"""规则分流:处理明显不需要推理的情况"""
query_clean = query.strip().lower()
# 1. 闲聊
if query_clean in CHITCHAT_KEYWORDS or any(kw in query_clean for kw in CHITCHAT_KEYWORDS):
return HybridRouterResult(
intent="chitchat",
confidence=1.0,
path="fast_chitchat",
reasoning="规则匹配:闲聊类请求"
)
# 2. 子图关键词
for subgraph_name, keywords in SUBGRAPH_KEYWORDS.items():
if any(kw in query_clean for kw in keywords):
return HybridRouterResult(
intent="tool",
confidence=0.9,
suggested_tools=[subgraph_name],
path="fast_tool",
reasoning=f"规则匹配:{subgraph_name} 子图关键词"
)
# 3. 短问题
if len(query_clean) < 3 or (query_clean.endswith("?") and len(query_clean) < 5):
return HybridRouterResult(
intent="complex",
confidence=0.3,
path="react_loop",
reasoning="规则匹配:问题过于简短"
)
return None
# ========== LLM 分类 ==========
async def _classify_with_llm(query: str) -> HybridRouterResult:
"""使用轻量级 LLM 进行意图分类"""
try:
llm = get_small_llm_service()
prompt = INTENT_CLASSIFICATION_PROMPT.format(query=query)
response = await llm.ainvoke(prompt)
# 使用新的 JSON 解析器
parse_result = extract_and_parse_json(response.content)
if not parse_result.success or not parse_result.data:
return _default_result()
return _parse_classification_result(parse_result.data)
except Exception as e:
debug(f"LLM 分类失败: {e}")
return _default_result()
def _parse_classification_result(data: dict) -> HybridRouterResult:
"""解析分类结果"""
intent = safe_get_str(data, "intent", "complex")
confidence = safe_get_float(data, "confidence", 0.3)
suggested_tools = safe_get(data, "suggested_tools", [])
reasoning = safe_get_str(data, "reasoning", "")
# 置信度低于阈值,走 complex
if confidence < 0.5:
intent = "complex"
# intent -> path 映射
path_map = {
"chitchat": "fast_chitchat",
"knowledge": "fast_rag",
"tool": "fast_tool",
}
return HybridRouterResult(
intent=intent,
confidence=confidence,
suggested_tools=suggested_tools,
path=path_map.get(intent, "react_loop"),
reasoning=reasoning
)
def _default_result() -> HybridRouterResult:
"""默认结果LLM 失败时)"""
return HybridRouterResult(
intent="complex",
confidence=0.3,
path="react_loop",
reasoning="LLM 调用失败,降级到 React 循环"
)
# ========== 主路由节点 ==========
async def hybrid_router_node(state: MainGraphState, config: Optional[RunnableConfig] = None) -> MainGraphState:
"""混合路由节点:前置路由,决定走快速路径还是 React 循环"""
state.current_phase = "hybrid_router"
query = state.user_query or ""
info(f"[Hybrid Router] 开始路由: {query[:50]}...")
# 1. 规则分流
rule_result = _rule_based_redirect(query)
if rule_result:
decision = rule_result
info(f"[Hybrid Router] 规则命中: {decision.path}")
else:
# 2. LLM 分类
info("[Hybrid Router] 规则未命中,使用 LLM 分类")
decision = await _classify_with_llm(query)
# 3. 更新状态(同时更新旧的 debug_info 和新的结构化字段)
state.debug_info["hybrid_decision"] = {
"intent": decision.intent,
"confidence": decision.confidence,
"path": decision.path,
"reasoning": decision.reasoning,
"suggested_tools": decision.suggested_tools
}
state.debug_info["hybrid_start_time"] = datetime.now().isoformat()
# 更新新的结构化字段
state.hybrid_router.decision = decision
state.hybrid_router.start_time = datetime.now().isoformat()
# 4. 发送事件
await dispatch_custom_event("intent_classified", {
"intent": decision.intent,
"confidence": decision.confidence,
"reasoning": decision.reasoning,
"suggested_tools": decision.suggested_tools
}, config)
await dispatch_custom_event("path_decision", {
"path": decision.path,
"intent": decision.intent,
"reasoning": decision.reasoning
}, config)
info(f"[Hybrid Router] 路由决策: {decision.path} (intent={decision.intent}, confidence={decision.confidence})")
return state
# ========== 条件路由函数 ==========
def route_from_hybrid_decision(state: MainGraphState) -> str:
"""从混合路由决策获取下一步节点"""
decision = state.debug_info.get("hybrid_decision", {})
return decision.get("path", "react_loop")
def check_fast_path_success(state: MainGraphState) -> str:
"""检查快速路径是否成功"""
if state.debug_info.get("fast_path_failed"):
info("[Fast Path Check] 快速路径失败,升级到 React 循环")
return "escalate"
info("[Fast Path Check] 快速路径成功,进入 llm_call")
return "llm_call"
# ========== 导出 ==========
__all__ = [
"hybrid_router_node",
"route_from_hybrid_decision",
"check_fast_path_success",
"HybridRouterResult",
]