ailine/backend/app/main_graph/nodes/rag_nodes.py

"""
RAG 检索节点模块
包含：RAG 检索、置信度判断、重检索等节点
"""

import time
import asyncio
from typing import Optional
from datetime import datetime
from langchain_core.runnables.config import RunnableConfig

from ...main_graph.state import MainGraphState, ErrorRecord, ErrorSeverity
from ...main_graph.utils.retry_utils import RAG_RETRY_CONFIG
from backend.app.logger import info, debug
from ...model_services import get_small_llm_service
from ._utils import dispatch_custom_event, make_react_event


# 置信度阈值配置
RAG_CONFIDENCE_THRESHOLD = 0.6  # 低于此值认为检索不相关


def _get_rag_tool() -> Optional[callable]:
    """获取 RAG 工具"""
    from backend.app.main_graph.utils.rag_initializer import get_rag_tool
    return get_rag_tool()


# ========== RAG 检索核心逻辑 ==========
async def _rag_retrieve_core(state: MainGraphState, rag_tool: callable) -> MainGraphState:
    """执行 RAG 检索的核心逻辑"""
    retrieval_query = state.user_query

    # 优先使用推理结果中的优化查询
    reasoning_result = state.debug_info.get("reasoning_result")
    if reasoning_result and hasattr(reasoning_result, "retrieval_config"):
        cfg = reasoning_result.retrieval_config
        if cfg and cfg.retrieval_query:
            retrieval_query = cfg.retrieval_query

    # 调用 RAG 工具
    rag_context = await rag_tool.ainvoke(retrieval_query)
    info(f"[RAG Core] 获取到 rag_context: {type(rag_context)}, 长度={len(rag_context) if rag_context else 0}")

    # 更新状态
    state.rag_context = rag_context
    state.rag_retrieved = True
    state.rag_attempts = getattr(state, 'rag_attempts', 0) + 1
    state.debug_info["rag_source"] = "tool"

    return state


# ========== RAG 检索节点 ==========
async def rag_retrieve_node(state: MainGraphState, config: Optional[RunnableConfig] = None) -> MainGraphState:
    """RAG 检索节点：检索 + 置信度评估"""
    state.current_phase = "rag_retrieving"
    start_time = time.time()

    rag_tool = _get_rag_tool()
    if not rag_tool:
        info("[RAG] RAG 工具未初始化")
        state.rag_confidence = 0.0
        state.rag_retrieved = False
        return state

    await dispatch_custom_event(
        "react_reasoning",
        make_react_event(state.reasoning_step, "rag_retrieve_start", 1.0, "开始执行 RAG 检索..."),
        config
    )

    try:
        state = await _rag_retrieve_core(state, rag_tool)

        # 评估置信度
        confidence = await _evaluate_rag_confidence(state)
        state.rag_confidence = confidence

        info(f"[RAG] 检索完成，置信度={confidence:.2f}，RAG尝试次数={state.rag_attempts}")

        state.reasoning_history.append({
            "step": state.reasoning_step,
            "action": "RETRIEVE_RAG",
            "confidence": confidence,
            "reasoning": f"RAG 检索完成，置信度={confidence:.2f}",
            "timestamp": datetime.now().isoformat()
        })

        await dispatch_custom_event(
            "react_reasoning",
            make_react_event(state.reasoning_step, "rag_retrieve_complete", confidence,
                           f"RAG 检索完成，置信度={confidence:.2f}"),
            config
        )

    except Exception as e:
        info(f"[RAG] 检索失败: {e}")
        state.rag_confidence = 0.0
        state.rag_retrieved = False

    return state


async def _evaluate_rag_confidence(state: MainGraphState) -> float:
    """评估 RAG 检索结果置信度（综合向量相似度 + 重排分数 + 小模型判断）"""
    query = state.user_query or ""
    rag_context = state.rag_context or ""

    if not rag_context:
        return 0.0

    # 方式1: 向量相似度（从 rag_docs 中获取）
    embedding_score = _get_embedding_similarity(state, query)
    info(f"[RAG Confidence] 向量相似度={embedding_score:.3f}")

    # 方式2: 重排序分数（从 rag_docs 中获取）
    rerank_score = _get_rerank_score(state)
    info(f"[RAG Confidence] 重排分数={rerank_score:.3f}")

    # 方式3: 小模型判断
    llm_score = await _get_llm_score(state)
    info(f"[RAG Confidence] LLM评估={llm_score:.3f}")

    # 综合得分（加权平均）
    # 向量相似度权重 0.3，重排权重 0.3，LLM 权重 0.4
    final_score = embedding_score * 0.3 + rerank_score * 0.3 + llm_score * 0.4
    info(f"[RAG Confidence] 综合置信度={final_score:.3f} (embedding={embedding_score:.3f}*0.3 + rerank={rerank_score:.3f}*0.3 + llm={llm_score:.3f}*0.4)")

    return final_score


def _get_embedding_similarity(state: MainGraphState) -> float:
    """从 rag_docs 中获取向量相似度分数"""
    rag_docs = getattr(state, "rag_docs", [])

    # 如果有多个文档，取最高分
    scores = []
    for doc in rag_docs:
        if isinstance(doc, dict):
            score = doc.get("score", 0.0)
            # 向量相似度通常在 0-1 之间，RRF 分数可能更高
            # 归一化到 0-1
            if score > 1.0:
                score = min(score / 10.0, 1.0)  # 假设 max 约 10
            scores.append(score)
        elif hasattr(doc, "metadata"):
            score = doc.metadata.get("score", 0.0)
            if score > 1.0:
                score = min(score / 10.0, 1.0)
            scores.append(score)

    if scores:
        # 取平均或最高分
        return max(scores)  # 使用最高分更准确
    return 0.0


def _get_rerank_score(state: MainGraphState) -> float:
    """从 rag_docs 中获取重排序分数"""
    rag_docs = getattr(state, "rag_docs", [])

    # 重排分数通常在 0-1 之间
    scores = []
    for doc in rag_docs:
        if isinstance(doc, dict):
            score = doc.get("rerank_score", 0.0)
        elif hasattr(doc, "metadata"):
            score = doc.metadata.get("rerank_score", 0.0)
        else:
            score = 0.0

        if score > 0:
            scores.append(score)

    if scores:
        return max(scores)  # 使用最高分
    return 0.0


async def _get_llm_score(state: MainGraphState) -> float:
    """使用小模型评估检索结果相关性"""
    query = state.user_query or ""
    rag_context = state.rag_context or ""

    try:
        llm = get_small_llm_service()
        prompt = f"""评估以下检索结果与用户问题的相关性，返回 0.0-1.0 的分数：
- 1.0 = 完全相关，能直接回答问题
- 0.5 = 部分相关，有一定参考价值
- 0.0 = 完全不相关，无法回答问题

用户问题：{query}

检索结果：{rag_context[:1500]}

只返回一个数字："""

        response = await llm.ainvoke(prompt)
        content = response.content.strip()

        import re
        match = re.search(r'(\d+\.?\d*)', content)
        if match:
            score = float(match.group(1))
            return max(0.0, min(1.0, score))

    except Exception as e:
        info(f"[RAG Confidence] LLM评估失败: {e}")

    return 0.5  # 默认中等置信度


# ========== 置信度判断节点 ==========
def check_rag_confidence(state: MainGraphState) -> str:
    """
    根据 RAG 置信度判断下一步

    Returns:
        "high_confidence" - 高置信度(>=0.6)，可直接生成回答
        "low_confidence" - 低置信度(<0.6)，需要联网搜索
        "no_rag" - 无检索结果，需要联网搜索
    """
    rag_attempts = getattr(state, 'rag_attempts', 0)
    rag_confidence = getattr(state, 'rag_confidence', 0.0)

    info(f"[Confidence Check] rag_attempts={rag_attempts}, rag_confidence={rag_confidence:.2f}")

    # 情况1: 没有检索结果
    if not getattr(state, 'rag_retrieved', False) or not state.rag_context:
        info("[Confidence Check] 无检索结果，走联网")
        return "no_rag"

    # 情况2: 置信度低于阈值
    if rag_confidence < RAG_CONFIDENCE_THRESHOLD:
        if rag_attempts >= 2:
            info(f"[Confidence Check] 置信度={rag_confidence:.2f}<{RAG_CONFIDENCE_THRESHOLD}，且RAG尝试{rag_attempts}次，走联网")
            return "low_confidence"
        else:
            info(f"[Confidence Check] 置信度={rag_confidence:.2f}<{RAG_CONFIDENCE_THRESHOLD}，可再尝试RAG一次")
            return "retry_rag"

    # 情况3: 高置信度
    info(f"[Confidence Check] 高置信度={rag_confidence:.2f}>={RAG_CONFIDENCE_THRESHOLD}，直接生成回答")
    return "high_confidence"


# ========== 导出 ==========
__all__ = [
    "rag_retrieve_node",
    "check_rag_confidence",
    "RAG_CONFIDENCE_THRESHOLD",
]
-												refactor: 将 RAG 节点拆分为独立模块

- 新增 rag_nodes.py: 独立的 RAG 检索节点
- 从 react_nodes.py 移除 RAG 相关代码
- 更新导入和导出
- rag_nodes.py 包含 rag_retrieve_node 和 rag_re_retrieve_node
- 添加 inject_rag_tool_to_state 工具函数

											
										
										
											2026-04-26 11:23:12 +08:00
+								"""
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
+								RAG 检索节点模块
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								包含：RAG 检索、置信度判断、重检索等节点
-												refactor: 将 RAG 节点拆分为独立模块

- 新增 rag_nodes.py: 独立的 RAG 检索节点
- 从 react_nodes.py 移除 RAG 相关代码
- 更新导入和导出
- rag_nodes.py 包含 rag_retrieve_node 和 rag_re_retrieve_node
- 添加 inject_rag_tool_to_state 工具函数

											
										
										
											2026-04-26 11:23:12 +08:00
+								"""
 								import time
-												refactor: 真正利用已有 RAG 代码重构 rag_nodes.py

- 真正导入和使用 backend/app/rag/tools.py
- 添加全局 RAG 工具管理（get/set_global_rag_tool）
- 集成 RAGPipeline，支持多路查询和重排序
- 兼容 rag_initializer.py 的初始化方式
- 移除模拟实现，使用真正的 RAG 功能

											
										
										
											2026-04-26 11:25:01 +08:00
+								import asyncio
-												refactor: 重构快速路径流程，统一通过 llm_call 输出

- 重构 fast_paths.py，让 fast_chitchat 和 fast_rag 都进入 llm_call 而不是直接设置 final_result
- 修改 check_fast_path_success 函数返回 'llm_call' 而不是 'success'
- 更新 main_graph_builder.py 的条件边配置，支持路由到 llm_call
- 在快速路径节点中添加清除 state.final_result 的逻辑，避免复用旧结果
- 重构 RAG 工具初始化方式，使用模块级变量管理
- 修改 finalize.py 让它返回 final_result
- 更新 agent_service.py 的 RAG 工具注入方式
- 简化 hybrid_router.py 的代码结构
- 清理 rag_nodes.py 的全局变量相关代码
- 更新相关测试文件

											
										
										
											2026-05-05 04:32:42 +08:00
+								from typing import Optional
-												refactor: 将 RAG 节点拆分为独立模块

- 新增 rag_nodes.py: 独立的 RAG 检索节点
- 从 react_nodes.py 移除 RAG 相关代码
- 更新导入和导出
- rag_nodes.py 包含 rag_retrieve_node 和 rag_re_retrieve_node
- 添加 inject_rag_tool_to_state 工具函数

											
										
										
											2026-04-26 11:23:12 +08:00
+								from datetime import datetime
-												导入方式修改

											
										
										
											2026-05-05 23:17:00 +08:00
+								from langchain_core.runnables.config import RunnableConfig
-												refactor: 将 RAG 节点拆分为独立模块

- 新增 rag_nodes.py: 独立的 RAG 检索节点
- 从 react_nodes.py 移除 RAG 相关代码
- 更新导入和导出
- rag_nodes.py 包含 rag_retrieve_node 和 rag_re_retrieve_node
- 添加 inject_rag_tool_to_state 工具函数

											
										
										
											2026-04-26 11:23:12 +08:00
-												导入方式修改

											
										
										
											2026-05-05 23:17:00 +08:00
+								from ...main_graph.state import MainGraphState, ErrorRecord, ErrorSeverity
 								from ...main_graph.utils.retry_utils import RAG_RETRY_CONFIG
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								from backend.app.logger import info, debug
 								from ...model_services import get_small_llm_service
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
+								from ._utils import dispatch_custom_event, make_react_event
-												refactor: 将 RAG 节点拆分为独立模块

- 新增 rag_nodes.py: 独立的 RAG 检索节点
- 从 react_nodes.py 移除 RAG 相关代码
- 更新导入和导出
- rag_nodes.py 包含 rag_retrieve_node 和 rag_re_retrieve_node
- 添加 inject_rag_tool_to_state 工具函数

											
										
										
											2026-04-26 11:23:12 +08:00
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								# 置信度阈值配置
 								RAG_CONFIDENCE_THRESHOLD = 0.6  # 低于此值认为检索不相关
-												refactor: 重构快速路径流程，统一通过 llm_call 输出

- 重构 fast_paths.py，让 fast_chitchat 和 fast_rag 都进入 llm_call 而不是直接设置 final_result
- 修改 check_fast_path_success 函数返回 'llm_call' 而不是 'success'
- 更新 main_graph_builder.py 的条件边配置，支持路由到 llm_call
- 在快速路径节点中添加清除 state.final_result 的逻辑，避免复用旧结果
- 重构 RAG 工具初始化方式，使用模块级变量管理
- 修改 finalize.py 让它返回 final_result
- 更新 agent_service.py 的 RAG 工具注入方式
- 简化 hybrid_router.py 的代码结构
- 清理 rag_nodes.py 的全局变量相关代码
- 更新相关测试文件

											
										
										
											2026-05-05 04:32:42 +08:00
+								def _get_rag_tool() -> Optional[callable]:
 								    """获取 RAG 工具"""
-												导入方式修改

											
										
										
											2026-05-05 23:17:00 +08:00
+								    from backend.app.main_graph.utils.rag_initializer import get_rag_tool
-												refactor: 重构快速路径流程，统一通过 llm_call 输出

- 重构 fast_paths.py，让 fast_chitchat 和 fast_rag 都进入 llm_call 而不是直接设置 final_result
- 修改 check_fast_path_success 函数返回 'llm_call' 而不是 'success'
- 更新 main_graph_builder.py 的条件边配置，支持路由到 llm_call
- 在快速路径节点中添加清除 state.final_result 的逻辑，避免复用旧结果
- 重构 RAG 工具初始化方式，使用模块级变量管理
- 修改 finalize.py 让它返回 final_result
- 更新 agent_service.py 的 RAG 工具注入方式
- 简化 hybrid_router.py 的代码结构
- 清理 rag_nodes.py 的全局变量相关代码
- 更新相关测试文件

											
										
										
											2026-05-05 04:32:42 +08:00
+								    return get_rag_tool()
-												refactor: 将 RAG 节点拆分为独立模块

- 新增 rag_nodes.py: 独立的 RAG 检索节点
- 从 react_nodes.py 移除 RAG 相关代码
- 更新导入和导出
- rag_nodes.py 包含 rag_retrieve_node 和 rag_re_retrieve_node
- 添加 inject_rag_tool_to_state 工具函数

											
										
										
											2026-04-26 11:23:12 +08:00
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
+								# ========== RAG 检索核心逻辑 ==========
-												refactor: 重构快速路径流程，统一通过 llm_call 输出

- 重构 fast_paths.py，让 fast_chitchat 和 fast_rag 都进入 llm_call 而不是直接设置 final_result
- 修改 check_fast_path_success 函数返回 'llm_call' 而不是 'success'
- 更新 main_graph_builder.py 的条件边配置，支持路由到 llm_call
- 在快速路径节点中添加清除 state.final_result 的逻辑，避免复用旧结果
- 重构 RAG 工具初始化方式，使用模块级变量管理
- 修改 finalize.py 让它返回 final_result
- 更新 agent_service.py 的 RAG 工具注入方式
- 简化 hybrid_router.py 的代码结构
- 清理 rag_nodes.py 的全局变量相关代码
- 更新相关测试文件

											
										
										
											2026-05-05 04:32:42 +08:00
+								async def _rag_retrieve_core(state: MainGraphState, rag_tool: callable) -> MainGraphState:
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
+								    """执行 RAG 检索的核心逻辑"""
-												refactor: 将 RAG 节点拆分为独立模块

- 新增 rag_nodes.py: 独立的 RAG 检索节点
- 从 react_nodes.py 移除 RAG 相关代码
- 更新导入和导出
- rag_nodes.py 包含 rag_retrieve_node 和 rag_re_retrieve_node
- 添加 inject_rag_tool_to_state 工具函数

											
										
										
											2026-04-26 11:23:12 +08:00
+								    retrieval_query = state.user_query
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
 								    # 优先使用推理结果中的优化查询
 								    reasoning_result = state.debug_info.get("reasoning_result")
 								    if reasoning_result and hasattr(reasoning_result, "retrieval_config"):
 								        cfg = reasoning_result.retrieval_config
 								        if cfg and cfg.retrieval_query:
 								            retrieval_query = cfg.retrieval_query
-												refactor: 重构快速路径流程，统一通过 llm_call 输出

- 重构 fast_paths.py，让 fast_chitchat 和 fast_rag 都进入 llm_call 而不是直接设置 final_result
- 修改 check_fast_path_success 函数返回 'llm_call' 而不是 'success'
- 更新 main_graph_builder.py 的条件边配置，支持路由到 llm_call
- 在快速路径节点中添加清除 state.final_result 的逻辑，避免复用旧结果
- 重构 RAG 工具初始化方式，使用模块级变量管理
- 修改 finalize.py 让它返回 final_result
- 更新 agent_service.py 的 RAG 工具注入方式
- 简化 hybrid_router.py 的代码结构
- 清理 rag_nodes.py 的全局变量相关代码
- 更新相关测试文件

											
										
										
											2026-05-05 04:32:42 +08:00
+								    # 调用 RAG 工具
 								    rag_context = await rag_tool.ainvoke(retrieval_query)
 								    info(f"[RAG Core] 获取到 rag_context: {type(rag_context)}, 长度={len(rag_context) if rag_context else 0}")
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								    # 更新状态
-												refactor: 重构快速路径流程，统一通过 llm_call 输出

- 重构 fast_paths.py，让 fast_chitchat 和 fast_rag 都进入 llm_call 而不是直接设置 final_result
- 修改 check_fast_path_success 函数返回 'llm_call' 而不是 'success'
- 更新 main_graph_builder.py 的条件边配置，支持路由到 llm_call
- 在快速路径节点中添加清除 state.final_result 的逻辑，避免复用旧结果
- 重构 RAG 工具初始化方式，使用模块级变量管理
- 修改 finalize.py 让它返回 final_result
- 更新 agent_service.py 的 RAG 工具注入方式
- 简化 hybrid_router.py 的代码结构
- 清理 rag_nodes.py 的全局变量相关代码
- 更新相关测试文件

											
										
										
											2026-05-05 04:32:42 +08:00
+								    state.rag_context = rag_context
 								    state.rag_retrieved = True
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								    state.rag_attempts = getattr(state, 'rag_attempts', 0) + 1
-												refactor: 重构快速路径流程，统一通过 llm_call 输出

- 重构 fast_paths.py，让 fast_chitchat 和 fast_rag 都进入 llm_call 而不是直接设置 final_result
- 修改 check_fast_path_success 函数返回 'llm_call' 而不是 'success'
- 更新 main_graph_builder.py 的条件边配置，支持路由到 llm_call
- 在快速路径节点中添加清除 state.final_result 的逻辑，避免复用旧结果
- 重构 RAG 工具初始化方式，使用模块级变量管理
- 修改 finalize.py 让它返回 final_result
- 更新 agent_service.py 的 RAG 工具注入方式
- 简化 hybrid_router.py 的代码结构
- 清理 rag_nodes.py 的全局变量相关代码
- 更新相关测试文件

											
										
										
											2026-05-05 04:32:42 +08:00
+								    state.debug_info["rag_source"] = "tool"
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
-												refactor: 重构快速路径流程，统一通过 llm_call 输出

- 重构 fast_paths.py，让 fast_chitchat 和 fast_rag 都进入 llm_call 而不是直接设置 final_result
- 修改 check_fast_path_success 函数返回 'llm_call' 而不是 'success'
- 更新 main_graph_builder.py 的条件边配置，支持路由到 llm_call
- 在快速路径节点中添加清除 state.final_result 的逻辑，避免复用旧结果
- 重构 RAG 工具初始化方式，使用模块级变量管理
- 修改 finalize.py 让它返回 final_result
- 更新 agent_service.py 的 RAG 工具注入方式
- 简化 hybrid_router.py 的代码结构
- 清理 rag_nodes.py 的全局变量相关代码
- 更新相关测试文件

											
										
										
											2026-05-05 04:32:42 +08:00
+								    return state
-												refactor: 真正利用已有 RAG 代码重构 rag_nodes.py

- 真正导入和使用 backend/app/rag/tools.py
- 添加全局 RAG 工具管理（get/set_global_rag_tool）
- 集成 RAGPipeline，支持多路查询和重排序
- 兼容 rag_initializer.py 的初始化方式
- 移除模拟实现，使用真正的 RAG 功能

											
										
										
											2026-04-26 11:25:01 +08:00
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
 								# ========== RAG 检索节点 ==========
-												导入方式修改

											
										
										
											2026-05-05 23:17:00 +08:00
+								async def rag_retrieve_node(state: MainGraphState, config: Optional[RunnableConfig] = None) -> MainGraphState:
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								    """RAG 检索节点：检索 + 置信度评估"""
-												refactor: 将 RAG 节点拆分为独立模块

- 新增 rag_nodes.py: 独立的 RAG 检索节点
- 从 react_nodes.py 移除 RAG 相关代码
- 更新导入和导出
- rag_nodes.py 包含 rag_retrieve_node 和 rag_re_retrieve_node
- 添加 inject_rag_tool_to_state 工具函数

											
										
										
											2026-04-26 11:23:12 +08:00
+								    state.current_phase = "rag_retrieving"
 								    start_time = time.time()
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
-												refactor: 重构快速路径流程，统一通过 llm_call 输出

- 重构 fast_paths.py，让 fast_chitchat 和 fast_rag 都进入 llm_call 而不是直接设置 final_result
- 修改 check_fast_path_success 函数返回 'llm_call' 而不是 'success'
- 更新 main_graph_builder.py 的条件边配置，支持路由到 llm_call
- 在快速路径节点中添加清除 state.final_result 的逻辑，避免复用旧结果
- 重构 RAG 工具初始化方式，使用模块级变量管理
- 修改 finalize.py 让它返回 final_result
- 更新 agent_service.py 的 RAG 工具注入方式
- 简化 hybrid_router.py 的代码结构
- 清理 rag_nodes.py 的全局变量相关代码
- 更新相关测试文件

											
										
										
											2026-05-05 04:32:42 +08:00
+								    rag_tool = _get_rag_tool()
 								    if not rag_tool:
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								        info("[RAG] RAG 工具未初始化")
 								        state.rag_confidence = 0.0
 								        state.rag_retrieved = False
-												refactor: 重构快速路径流程，统一通过 llm_call 输出

- 重构 fast_paths.py，让 fast_chitchat 和 fast_rag 都进入 llm_call 而不是直接设置 final_result
- 修改 check_fast_path_success 函数返回 'llm_call' 而不是 'success'
- 更新 main_graph_builder.py 的条件边配置，支持路由到 llm_call
- 在快速路径节点中添加清除 state.final_result 的逻辑，避免复用旧结果
- 重构 RAG 工具初始化方式，使用模块级变量管理
- 修改 finalize.py 让它返回 final_result
- 更新 agent_service.py 的 RAG 工具注入方式
- 简化 hybrid_router.py 的代码结构
- 清理 rag_nodes.py 的全局变量相关代码
- 更新相关测试文件

											
										
										
											2026-05-05 04:32:42 +08:00
+								        return state
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
+								    await dispatch_custom_event(
 								        "react_reasoning",
 								        make_react_event(state.reasoning_step, "rag_retrieve_start", 1.0, "开始执行 RAG 检索..."),
 								        config
 								    )
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								    try:
 								        state = await _rag_retrieve_core(state, rag_tool)
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								        # 评估置信度
 								        confidence = await _evaluate_rag_confidence(state)
 								        state.rag_confidence = confidence
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								        info(f"[RAG] 检索完成，置信度={confidence:.2f}，RAG尝试次数={state.rag_attempts}")
 								        state.reasoning_history.append({
 								            "step": state.reasoning_step,
 								            "action": "RETRIEVE_RAG",
 								            "confidence": confidence,
 								            "reasoning": f"RAG 检索完成，置信度={confidence:.2f}",
 								            "timestamp": datetime.now().isoformat()
 								        })
 								        await dispatch_custom_event(
 								            "react_reasoning",
 								            make_react_event(state.reasoning_step, "rag_retrieve_complete", confidence,
 								                           f"RAG 检索完成，置信度={confidence:.2f}"),
 								            config
 								        )
 								    except Exception as e:
 								        info(f"[RAG] 检索失败: {e}")
 								        state.rag_confidence = 0.0
 								        state.rag_retrieved = False
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
-												refactor: 将 RAG 节点拆分为独立模块

- 新增 rag_nodes.py: 独立的 RAG 检索节点
- 从 react_nodes.py 移除 RAG 相关代码
- 更新导入和导出
- rag_nodes.py 包含 rag_retrieve_node 和 rag_re_retrieve_node
- 添加 inject_rag_tool_to_state 工具函数

											
										
										
											2026-04-26 11:23:12 +08:00
+								    return state
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								async def _evaluate_rag_confidence(state: MainGraphState) -> float:
 								    """评估 RAG 检索结果置信度（综合向量相似度 + 重排分数 + 小模型判断）"""
 								    query = state.user_query or ""
 								    rag_context = state.rag_context or ""
 								    if not rag_context:
 								        return 0.0
 								    # 方式1: 向量相似度（从 rag_docs 中获取）
 								    embedding_score = _get_embedding_similarity(state, query)
 								    info(f"[RAG Confidence] 向量相似度={embedding_score:.3f}")
 								    # 方式2: 重排序分数（从 rag_docs 中获取）
 								    rerank_score = _get_rerank_score(state)
 								    info(f"[RAG Confidence] 重排分数={rerank_score:.3f}")
 								    # 方式3: 小模型判断
 								    llm_score = await _get_llm_score(state)
 								    info(f"[RAG Confidence] LLM评估={llm_score:.3f}")
 								    # 综合得分（加权平均）
 								    # 向量相似度权重 0.3，重排权重 0.3，LLM 权重 0.4
 								    final_score = embedding_score * 0.3 + rerank_score * 0.3 + llm_score * 0.4
 								    info(f"[RAG Confidence] 综合置信度={final_score:.3f} (embedding={embedding_score:.3f}*0.3 + rerank={rerank_score:.3f}*0.3 + llm={llm_score:.3f}*0.4)")
 								    return final_score
 								def _get_embedding_similarity(state: MainGraphState) -> float:
 								    """从 rag_docs 中获取向量相似度分数"""
 								    rag_docs = getattr(state, "rag_docs", [])
 								    # 如果有多个文档，取最高分
 								    scores = []
 								    for doc in rag_docs:
 								        if isinstance(doc, dict):
 								            score = doc.get("score", 0.0)
 								            # 向量相似度通常在 0-1 之间，RRF 分数可能更高
 								            # 归一化到 0-1
 								            if score > 1.0:
 								                score = min(score / 10.0, 1.0)  # 假设 max 约 10
 								            scores.append(score)
 								        elif hasattr(doc, "metadata"):
 								            score = doc.metadata.get("score", 0.0)
 								            if score > 1.0:
 								                score = min(score / 10.0, 1.0)
 								            scores.append(score)
 								    if scores:
 								        # 取平均或最高分
 								        return max(scores)  # 使用最高分更准确
 								    return 0.0
 								def _get_rerank_score(state: MainGraphState) -> float:
 								    """从 rag_docs 中获取重排序分数"""
 								    rag_docs = getattr(state, "rag_docs", [])
 								    # 重排分数通常在 0-1 之间
 								    scores = []
 								    for doc in rag_docs:
 								        if isinstance(doc, dict):
 								            score = doc.get("rerank_score", 0.0)
 								        elif hasattr(doc, "metadata"):
 								            score = doc.metadata.get("rerank_score", 0.0)
 								        else:
 								            score = 0.0
 								        if score > 0:
 								            scores.append(score)
 								    if scores:
 								        return max(scores)  # 使用最高分
 								    return 0.0
 								async def _get_llm_score(state: MainGraphState) -> float:
 								    """使用小模型评估检索结果相关性"""
 								    query = state.user_query or ""
 								    rag_context = state.rag_context or ""
 								    try:
 								        llm = get_small_llm_service()
 								        prompt = f"""评估以下检索结果与用户问题的相关性，返回 0.0-1.0 的分数：
 								- 1.0 = 完全相关，能直接回答问题
 								- 0.5 = 部分相关，有一定参考价值
 								- 0.0 = 完全不相关，无法回答问题
 								用户问题：{query}
 								检索结果：{rag_context[:1500]}
 								只返回一个数字："""
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								        response = await llm.ainvoke(prompt)
 								        content = response.content.strip()
-												修复循环推理bug

											
										
										
											2026-05-05 00:54:04 +08:00
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								        import re
 								        match = re.search(r'(\d+\.?\d*)', content)
 								        if match:
 								            score = float(match.group(1))
 								            return max(0.0, min(1.0, score))
 								    except Exception as e:
 								        info(f"[RAG Confidence] LLM评估失败: {e}")
 								    return 0.5  # 默认中等置信度
 								# ========== 置信度判断节点 ==========
 								def check_rag_confidence(state: MainGraphState) -> str:
 								    """
 								    根据 RAG 置信度判断下一步
 								    Returns:
 								        "high_confidence" - 高置信度(>=0.6)，可直接生成回答
 								        "low_confidence" - 低置信度(<0.6)，需要联网搜索
 								        "no_rag" - 无检索结果，需要联网搜索
 								    """
 								    rag_attempts = getattr(state, 'rag_attempts', 0)
 								    rag_confidence = getattr(state, 'rag_confidence', 0.0)
 								    info(f"[Confidence Check] rag_attempts={rag_attempts}, rag_confidence={rag_confidence:.2f}")
 								    # 情况1: 没有检索结果
 								    if not getattr(state, 'rag_retrieved', False) or not state.rag_context:
 								        info("[Confidence Check] 无检索结果，走联网")
 								        return "no_rag"
 								    # 情况2: 置信度低于阈值
 								    if rag_confidence < RAG_CONFIDENCE_THRESHOLD:
 								        if rag_attempts >= 2:
 								            info(f"[Confidence Check] 置信度={rag_confidence:.2f}<{RAG_CONFIDENCE_THRESHOLD}，且RAG尝试{rag_attempts}次，走联网")
 								            return "low_confidence"
 								        else:
 								            info(f"[Confidence Check] 置信度={rag_confidence:.2f}<{RAG_CONFIDENCE_THRESHOLD}，可再尝试RAG一次")
 								            return "retry_rag"
 								    # 情况3: 高置信度
 								    info(f"[Confidence Check] 高置信度={rag_confidence:.2f}>={RAG_CONFIDENCE_THRESHOLD}，直接生成回答")
 								    return "high_confidence"
-												refactor: 将 RAG 节点拆分为独立模块

- 新增 rag_nodes.py: 独立的 RAG 检索节点
- 从 react_nodes.py 移除 RAG 相关代码
- 更新导入和导出
- rag_nodes.py 包含 rag_retrieve_node 和 rag_re_retrieve_node
- 添加 inject_rag_tool_to_state 工具函数

											
										
										
											2026-04-26 11:23:12 +08:00
 								# ========== 导出 ==========
 								__all__ = [
 								    "rag_retrieve_node",
-												添加rag置信度判断

											
										
										
											2026-05-06 01:15:52 +08:00
+								    "check_rag_confidence",
 								    "RAG_CONFIDENCE_THRESHOLD",
-												refactor: 将 RAG 节点拆分为独立模块

- 新增 rag_nodes.py: 独立的 RAG 检索节点
- 从 react_nodes.py 移除 RAG 相关代码
- 更新导入和导出
- rag_nodes.py 包含 rag_retrieve_node 和 rag_re_retrieve_node
- 添加 inject_rag_tool_to_state 工具函数

											
										
										
											2026-04-26 11:23:12 +08:00
+								]