Files
ailine/backend/app/rag/fusion.py
root 8b354b7ccc
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Failing after 47m14s
重构代码,统一config配置
2026-04-21 11:02:16 +08:00

36 lines
1.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# rag/fusion.py
from typing import List, Dict
from langchain_core.documents import Document
def reciprocal_rank_fusion(
doc_lists: List[List[Document]],
k: int = 60
) -> List[Document]:
"""
对多个检索结果列表进行 RRF 融合。
Args:
doc_lists: 多个检索结果列表,每个列表来自一个查询
k: RRF 常数,通常设为 60
Returns:
融合后按 RRF 得分降序排列的文档列表
"""
# 使用文档内容作为唯一标识(如果内容相同但 metadata 不同,视为同一文档)
# 更好的做法是用 docstore 的 ID这里简化处理用内容 hash
doc_to_score: Dict[str, float] = {}
doc_map: Dict[str, Document] = {}
for docs in doc_lists:
for rank, doc in enumerate(docs, start=1):
# 生成唯一标识符(内容+来源组合,避免不同文件相同内容混淆)
doc_id = f"{doc.page_content[:200]}_{doc.metadata.get('source', '')}"
if doc_id not in doc_map:
doc_map[doc_id] = doc
score = doc_to_score.get(doc_id, 0.0) + 1.0 / (k + rank)
doc_to_score[doc_id] = score
# 按得分排序
sorted_ids = sorted(doc_to_score.keys(), key=lambda x: doc_to_score[x], reverse=True)
return [doc_map[doc_id] for doc_id in sorted_ids]