重构：增强 JSON 解析稳定性，优化 Prompt，改进状态结构

主要改进： 1. 新增 json_parser.py - 统一的 JSON 解析工具 - 支持多种格式（纯 JSON、markdown、文本中的 JSON） - 多层 fallback 策略 - 安全的字段提取函数 2. 优化 intent.py 和 hybrid_router.py - 使用新的 json_parser - 优化 Prompt，更清晰的格式要求 - 更好的错误处理 3. 改进 state.py - 新增结构化状态字段 - ReactReasoningState、HybridRouterState、FastPathState - 向后兼容旧的 debug_info 4. 更新各节点模块 - 同时更新旧字段保持兼容 - reasoning.py - 更新 state.react_reasoning - hybrid_router.py - 更新 state.hybrid_router - fast_paths.py - 更新 state.fast_path
2026-05-06 13:34:32 +08:00
parent 13e1d03741
commit d96301e4d5
6 changed files with 409 additions and 105 deletions
--- a/backend/app/main_graph/nodes/hybrid_router.py
+++ b/backend/app/main_graph/nodes/hybrid_router.py
@@ -14,6 +14,7 @@ from ..state import MainGraphState
 from backend.app.logger import info, debug
 from ...model_services.chat_services import get_small_llm_service
 from ._utils import dispatch_custom_event
+from backend.app.core.json_parser import extract_and_parse_json, safe_get, safe_get_float, safe_get_str


 # ========== 核心数据类型 ==========
@@ -44,24 +45,34 @@ SUBGRAPH_KEYWORDS = {
 # ========== 意图分类 Prompt 模板 ==========
 INTENT_CLASSIFICATION_PROMPT = """你是一个专业的意图分类助手。请分析用户的查询，并输出 JSON 格式的结果。

-意图类型（4选一）：
+【格式要求】
+你必须严格输出 JSON 格式，不要加任何 Markdown 代码块标记（如 ```json）。
+仅输出纯 JSON 字符串，不要有其他解释文字。
+
+【意图类型（4选一）：
 - chitchat: 闲聊、问候、感谢、道别（不需要工具）
 - knowledge: 知识查询（需要查询知识库）
 - tool: 工具操作（需要调用通讯录/词典/新闻等子图）
 - complex: 复杂任务（多步骤、不确定、或需要推理）

-用户查询:
-{query}
-
-输出格式（仅 JSON，不要其他内容）：
+【输出格式】
 {{
    "intent": "chitchat|knowledge|tool|complex",
-    "confidence": 0.0-1.0,
+    "confidence": 0.85,
    "reasoning": "简要说明理由",
    "suggested_tools": ["contact|dictionary|news_analysis", "other"]
 }}

-注意：如果不能100%确定意图，请选择 "complex"，置信度设低一些。"""
+【重要提示】
+- 如果不能100%确定意图，请选择 "complex"，置信度设低一些。
+- confidence 是你对当前分类的信心（0.0-1.0）。
+- suggested_tools 仅在 intent=tool 时提供，否则设为空数组。
+
+【用户查询】
+{query}
+
+【现在开始】
+请根据以上信息，输出你的分类 JSON："""


 # ========== 规则分流（<5ms） ==========
@@ -109,13 +120,12 @@ async def _classify_with_llm(query: str) -> HybridRouterResult:
        prompt = INTENT_CLASSIFICATION_PROMPT.format(query=query)
        response = await llm.ainvoke(prompt)

-        # 解析 JSON
-        json_match = re.search(r'\{[\s\S]*?\}', response.content)
-        if not json_match:
+        # 使用新的 JSON 解析器
+        parse_result = extract_and_parse_json(response.content)
+        if not parse_result.success or not parse_result.data:
            return _default_result()

-        data = json.loads(json_match.group())
-        return _parse_classification_result(data)
+        return _parse_classification_result(parse_result.data)

    except Exception as e:
        debug(f"LLM 分类失败: {e}")
@@ -124,8 +134,10 @@ async def _classify_with_llm(query: str) -> HybridRouterResult:

 def _parse_classification_result(data: dict) -> HybridRouterResult:
    """解析分类结果"""
-    intent = data.get("intent", "complex")
-    confidence = float(data.get("confidence", 0.3))
+    intent = safe_get_str(data, "intent", "complex")
+    confidence = safe_get_float(data, "confidence", 0.3)
+    suggested_tools = safe_get(data, "suggested_tools", [])
+    reasoning = safe_get_str(data, "reasoning", "")

    # 置信度低于阈值，走 complex
    if confidence < 0.5:
@@ -141,9 +153,9 @@ def _parse_classification_result(data: dict) -> HybridRouterResult:
    return HybridRouterResult(
        intent=intent,
        confidence=confidence,
-        suggested_tools=data.get("suggested_tools", []),
+        suggested_tools=suggested_tools,
        path=path_map.get(intent, "react_loop"),
-        reasoning=data.get("reasoning", "")
+        reasoning=reasoning
    )


@@ -175,7 +187,7 @@ async def hybrid_router_node(state: MainGraphState, config: Optional[RunnableCon
        info("[Hybrid Router] 规则未命中，使用 LLM 分类")
        decision = await _classify_with_llm(query)

-    # 3. 更新状态
+    # 3. 更新状态（同时更新旧的 debug_info 和新的结构化字段）
    state.debug_info["hybrid_decision"] = {
        "intent": decision.intent,
        "confidence": decision.confidence,
@@ -184,6 +196,10 @@ async def hybrid_router_node(state: MainGraphState, config: Optional[RunnableCon
        "suggested_tools": decision.suggested_tools
    }
    state.debug_info["hybrid_start_time"] = datetime.now().isoformat()
+    
+    # 更新新的结构化字段
+    state.hybrid_router.decision = decision
+    state.hybrid_router.start_time = datetime.now().isoformat()

    # 4. 发送事件
    await dispatch_custom_event("intent_classified", {