ailine/backend/app/main_graph/nodes/rag_nodes.py

"""
RAG 检索节点模块
包含：RAG 检索、置信度判断、重检索等节点
"""

import time
import asyncio
from typing import Optional
from datetime import datetime
from langchain_core.runnables.config import RunnableConfig

from ...main_graph.state import MainGraphState, ErrorRecord, ErrorSeverity
from ...main_graph.utils.retry_utils import RAG_RETRY_CONFIG
from backend.app.logger import info, debug
from ...model_services import get_small_llm_service
from ._utils import dispatch_custom_event, make_react_event


# 置信度阈值配置
RAG_CONFIDENCE_THRESHOLD = 0.6  # 低于此值认为检索不相关


def _get_rag_tool() -> Optional[callable]:
    """获取 RAG 工具"""
    from backend.app.main_graph.utils.rag_initializer import get_rag_tool
    return get_rag_tool()


# ========== RAG 检索核心逻辑 ==========
async def _rag_retrieve_core(state: MainGraphState, rag_tool: callable) -> MainGraphState:
    """执行 RAG 检索的核心逻辑"""
    retrieval_query = state.user_query

    # 优先使用推理结果中的优化查询
    reasoning_result = state.debug_info.get("reasoning_result")
    if reasoning_result and hasattr(reasoning_result, "retrieval_config"):
        cfg = reasoning_result.retrieval_config
        if cfg and cfg.retrieval_query:
            retrieval_query = cfg.retrieval_query

    # 调用 RAG 工具
    rag_context = await rag_tool.ainvoke(retrieval_query)
    info(f"[RAG Core] 获取到 rag_context: {type(rag_context)}, 长度={len(rag_context) if rag_context else 0}")

    # 更新状态
    state.rag_context = rag_context
    state.rag_retrieved = True
    state.rag_attempts = getattr(state, 'rag_attempts', 0) + 1
    state.debug_info["rag_source"] = "tool"

    return state


# ========== RAG 检索节点 ==========
async def rag_retrieve_node(state: MainGraphState, config: Optional[RunnableConfig] = None) -> MainGraphState:
    """RAG 检索节点：检索 + 置信度评估"""
    state.current_phase = "rag_retrieving"
    start_time = time.time()

    rag_tool = _get_rag_tool()
    if not rag_tool:
        info("[RAG] RAG 工具未初始化")
        state.rag_confidence = 0.0
        state.rag_retrieved = False
        return state

    await dispatch_custom_event(
        "react_reasoning",
        make_react_event(state.reasoning_step, "rag_retrieve_start", 1.0, "开始执行 RAG 检索..."),
        config
    )

    try:
        state = await _rag_retrieve_core(state, rag_tool)

        # 评估置信度
        confidence = await _evaluate_rag_confidence(state)
        state.rag_confidence = confidence

        info(f"[RAG] 检索完成，置信度={confidence:.2f}，RAG尝试次数={state.rag_attempts}")

        state.reasoning_history.append({
            "step": state.reasoning_step,
            "action": "RETRIEVE_RAG",
            "confidence": confidence,
            "reasoning": f"RAG 检索完成，置信度={confidence:.2f}",
            "timestamp": datetime.now().isoformat()
        })

        await dispatch_custom_event(
            "react_reasoning",
            make_react_event(state.reasoning_step, "rag_retrieve_complete", confidence,
                           f"RAG 检索完成，置信度={confidence:.2f}"),
            config
        )

    except Exception as e:
        info(f"[RAG] 检索失败: {e}")
        state.rag_confidence = 0.0
        state.rag_retrieved = False

    return state


async def _evaluate_rag_confidence(state: MainGraphState) -> float:
    """评估 RAG 检索结果置信度（综合向量相似度 + 重排分数 + 小模型判断）"""
    query = state.user_query or ""
    rag_context = state.rag_context or ""

    if not rag_context:
        return 0.0

    # 方式1: 向量相似度（从 rag_docs 中获取）
    embedding_score = _get_embedding_similarity(state, query)
    info(f"[RAG Confidence] 向量相似度={embedding_score:.3f}")

    # 方式2: 重排序分数（从 rag_docs 中获取）
    rerank_score = _get_rerank_score(state)
    info(f"[RAG Confidence] 重排分数={rerank_score:.3f}")

    # 方式3: 小模型判断
    llm_score = await _get_llm_score(state)
    info(f"[RAG Confidence] LLM评估={llm_score:.3f}")

    # 综合得分（加权平均）
    # 向量相似度权重 0.3，重排权重 0.3，LLM 权重 0.4
    final_score = embedding_score * 0.3 + rerank_score * 0.3 + llm_score * 0.4
    info(f"[RAG Confidence] 综合置信度={final_score:.3f} (embedding={embedding_score:.3f}*0.3 + rerank={rerank_score:.3f}*0.3 + llm={llm_score:.3f}*0.4)")

    return final_score


def _get_embedding_similarity(state: MainGraphState) -> float:
    """从 rag_docs 中获取向量相似度分数"""
    rag_docs = getattr(state, "rag_docs", [])

    # 如果有多个文档，取最高分
    scores = []
    for doc in rag_docs:
        if isinstance(doc, dict):
            score = doc.get("score", 0.0)
            # 向量相似度通常在 0-1 之间，RRF 分数可能更高
            # 归一化到 0-1
            if score > 1.0:
                score = min(score / 10.0, 1.0)  # 假设 max 约 10
            scores.append(score)
        elif hasattr(doc, "metadata"):
            score = doc.metadata.get("score", 0.0)
            if score > 1.0:
                score = min(score / 10.0, 1.0)
            scores.append(score)

    if scores:
        # 取平均或最高分
        return max(scores)  # 使用最高分更准确
    return 0.0


def _get_rerank_score(state: MainGraphState) -> float:
    """从 rag_docs 中获取重排序分数"""
    rag_docs = getattr(state, "rag_docs", [])

    # 重排分数通常在 0-1 之间
    scores = []
    for doc in rag_docs:
        if isinstance(doc, dict):
            score = doc.get("rerank_score", 0.0)
        elif hasattr(doc, "metadata"):
            score = doc.metadata.get("rerank_score", 0.0)
        else:
            score = 0.0

        if score > 0:
            scores.append(score)

    if scores:
        return max(scores)  # 使用最高分
    return 0.0


async def _get_llm_score(state: MainGraphState) -> float:
    """使用小模型评估检索结果相关性"""
    query = state.user_query or ""
    rag_context = state.rag_context or ""

    try:
        llm = get_small_llm_service()
        prompt = f"""评估以下检索结果与用户问题的相关性，返回 0.0-1.0 的分数：
- 1.0 = 完全相关，能直接回答问题
- 0.5 = 部分相关，有一定参考价值
- 0.0 = 完全不相关，无法回答问题

用户问题：{query}

检索结果：{rag_context[:1500]}

只返回一个数字："""

        response = await llm.ainvoke(prompt)
        content = response.content.strip()

        import re
        match = re.search(r'(\d+\.?\d*)', content)
        if match:
            score = float(match.group(1))
            return max(0.0, min(1.0, score))

    except Exception as e:
        info(f"[RAG Confidence] LLM评估失败: {e}")

    return 0.5  # 默认中等置信度


# ========== 置信度判断节点 ==========
def check_rag_confidence(state: MainGraphState) -> str:
    """
    根据 RAG 置信度判断下一步

    Returns:
        "high_confidence" - 高置信度(>=0.6)，可直接生成回答
        "low_confidence" - 低置信度(<0.6)，需要联网搜索
        "no_rag" - 无检索结果，需要联网搜索
    """
    rag_attempts = getattr(state, 'rag_attempts', 0)
    rag_confidence = getattr(state, 'rag_confidence', 0.0)

    info(f"[Confidence Check] rag_attempts={rag_attempts}, rag_confidence={rag_confidence:.2f}")

    # 情况1: 没有检索结果
    if not getattr(state, 'rag_retrieved', False) or not state.rag_context:
        info("[Confidence Check] 无检索结果，走联网")
        return "no_rag"

    # 情况2: 置信度低于阈值
    if rag_confidence < RAG_CONFIDENCE_THRESHOLD:
        if rag_attempts >= 2:
            info(f"[Confidence Check] 置信度={rag_confidence:.2f}<{RAG_CONFIDENCE_THRESHOLD}，且RAG尝试{rag_attempts}次，走联网")
            return "low_confidence"
        else:
            info(f"[Confidence Check] 置信度={rag_confidence:.2f}<{RAG_CONFIDENCE_THRESHOLD}，可再尝试RAG一次")
            return "retry_rag"

    # 情况3: 高置信度
    info(f"[Confidence Check] 高置信度={rag_confidence:.2f}>={RAG_CONFIDENCE_THRESHOLD}，直接生成回答")
    return "high_confidence"


# ========== 导出 ==========
__all__ = [
    "rag_retrieve_node",
    "check_rag_confidence",
    "RAG_CONFIDENCE_THRESHOLD",
]