✨ 完整的混合路由优化系统

1. 双模型服务 (llm + smallLLM) - 增加 get_small_llm_service() 函数 - 支持智谱/DeepSeek 小模型作为轻量级选项 2. 前置混合路由 - 规则快速分流（无 LLM，超快速） - 轻量级意图分类（smallLLM） - 快速路径：fast_chitchat, fast_rag, fast_tool 3. 自动升级机制 - 快速路径失败 → 自动回到 React 循环 - SSE 事件增强：intent_classified, path_decision, fast_path_*, escalation 4. 向后兼容 - build_react_main_graph(use_hybrid_router=True/False) - 可选择启用或禁用混合路由 5. 更新 intent.py - 支持 use_small_llm 参数 - 保留原有完整功能供 React 循环使用
2026-05-03 16:45:46 +08:00
parent 9c53f58165
commit a5fc9cd5d8
5 changed files with 928 additions and 63 deletions
--- a/backend/app/core/intent.py
+++ b/backend/app/core/intent.py
@@ -71,23 +71,34 @@ class ReactIntentReasoner:
    2. 决定是否需要 RAG 检索/重新检索
    3. 决定是否需要路由到子图
    4. 提供降级策略（规则匹配）
+    
+    可以选择使用大模型或小模型
    """
-
-    def __init__(self):
-        """初始化推理器 - 懒加载 LLM 服务"""
+    
+    def __init__(self, use_small_llm: bool = False):
+        """
+        初始化推理器
+        
+        Args:
+            use_small_llm: 是否使用轻量级模型（用于意图分类）
+        """
        self._llm_service = None
+        self._use_small_llm = use_small_llm
        self._subgraph_keywords = {
            "contact": ["通讯录", "联系人", "contact", "email", "邮件", "邮箱"],
            "dictionary": ["词典", "单词", "翻译", "dictionary", "translate", "生词"],
            "news_analysis": ["资讯", "新闻", "分析", "news", "report", "热点"],
            "research": ["研究", "深度分析", "报告", "引用", "溯源", "research", "analyze", "report"]
        }
-
+    
    def _get_llm_service(self):
        """懒加载 LLM 服务（避免循环导入）"""
        if self._llm_service is None:
-            from app.model_services.chat_services import get_chat_service
-            self._llm_service = get_chat_service()
+            from app.model_services.chat_services import get_chat_service, get_small_llm_service
+            if self._use_small_llm:
+                self._llm_service = get_small_llm_service()
+            else:
+                self._llm_service = get_chat_service()
        return self._llm_service

    async def reason(
@@ -320,19 +331,34 @@ class ReactIntentReasoner:

 # 全局推理器实例（懒加载）
 _reasoner: Optional[ReactIntentReasoner] = None
+_small_reasoner: Optional[ReactIntentReasoner] = None


-def _get_reasoner() -> ReactIntentReasoner:
-    """获取推理器实例"""
-    global _reasoner
-    if _reasoner is None:
-        _reasoner = ReactIntentReasoner()
-    return _reasoner
+def _get_reasoner(use_small_llm: bool = False) -> ReactIntentReasoner:
+    """
+    获取推理器实例
+    
+    Args:
+        use_small_llm: 是否使用轻量级模型
+    
+    Returns:
+        ReactIntentReasoner 实例
+    """
+    global _reasoner, _small_reasoner
+    if use_small_llm:
+        if _small_reasoner is None:
+            _small_reasoner = ReactIntentReasoner(use_small_llm=True)
+        return _small_reasoner
+    else:
+        if _reasoner is None:
+            _reasoner = ReactIntentReasoner(use_small_llm=False)
+        return _reasoner


 async def react_reason_async(
    query: str,
-    context: Optional[Dict[str, Any]] = None
+    context: Optional[Dict[str, Any]] = None,
+    use_small_llm: bool = False
 ) -> ReasoningResult:
    """
    便捷函数：异步 React 推理（推荐使用）
@@ -340,17 +366,19 @@ async def react_reason_async(
    Args:
        query: 用户查询
        context: 上下文
+        use_small_llm: 是否使用轻量级模型
    
    Returns:
        ReasoningResult
    """
-    reasoner = _get_reasoner()
+    reasoner = _get_reasoner(use_small_llm=use_small_llm)
    return await reasoner.reason(query, context)


 def react_reason(
    query: str,
-    context: Optional[Dict[str, Any]] = None
+    context: Optional[Dict[str, Any]] = None,
+    use_small_llm: bool = False
 ) -> ReasoningResult:
    """
    便捷函数：同步 React 推理（保持向后兼容）
@@ -360,33 +388,34 @@ def react_reason(
    Args:
        query: 用户查询
        context: 上下文
+        use_small_llm: 是否使用轻量级模型
    
    Returns:
        ReasoningResult
    """
    import asyncio
-
+    
    try:
        # 尝试获取现有事件循环
        loop = asyncio.get_event_loop()
        if loop.is_running():
            # 已经在运行的循环中，创建任务
-            task = loop.create_task(react_reason_async(query, context))
            # 注意：这里不能真正等待，会导致死锁
            # 降级到规则推理
-            print("[ReactReasoner] 检测到运行中的事件循环，使用规则推理")
-            reasoner = _get_reasoner()
+            print(f"[ReactReasoner] 检测到运行中的事件循环，使用规则推理")
+            reasoner = _get_reasoner(use_small_llm=use_small_llm)
            return reasoner._reason_with_rules(query, context or {})
    except RuntimeError:
        pass
-
+    
    # 创建新的事件循环
    loop = asyncio.new_event_loop()
    try:
        asyncio.set_event_loop(loop)
-        return loop.run_until_complete(react_reason_async(query, context))
+        return loop.run_until_complete(react_reason_async(query, context, use_small_llm=use_small_llm))
    finally:
        loop.close()
+        loop.close()


 def get_route_by_reasoning(result: ReasoningResult) -> str: