This commit is contained in:
@@ -81,11 +81,17 @@ class RAGPipeline:
|
||||
return await self.retriever.ainvoke(query)
|
||||
|
||||
async def _get_parents(self, child_docs: List[Document]) -> List[Document]:
|
||||
parent_map = {}
|
||||
# 收集 parent_id 和对应的分数
|
||||
parent_map = {} # parent_id -> (embedding_score, rerank_score)
|
||||
|
||||
for doc in child_docs:
|
||||
pid = doc.metadata.get("parent_id")
|
||||
if pid and pid not in parent_map:
|
||||
parent_map[pid] = doc.metadata.get("score", 0.0)
|
||||
# embedding 分数
|
||||
embedding_score = doc.metadata.get("score", 0.0)
|
||||
# rerank 分数(如果有的话)
|
||||
rerank_score = doc.metadata.get("rerank_score", 0.0)
|
||||
parent_map[pid] = (embedding_score, rerank_score)
|
||||
|
||||
if not parent_map:
|
||||
logger.warning("[Pipeline] 未找到 parent_id,返回子文档")
|
||||
@@ -94,10 +100,19 @@ class RAGPipeline:
|
||||
try:
|
||||
from backend.rag_core import create_docstore
|
||||
docstore, _ = create_docstore()
|
||||
# 同步获取(异步版本不存在)
|
||||
parent_docs = docstore.mget(list(parent_map.keys()))
|
||||
parent_map2 = {d.metadata.get("id"): d for d in parent_docs if d}
|
||||
result = [(parent_map2[pid], score) for pid, score in parent_map.items() if pid in parent_map2]
|
||||
|
||||
# 构建结果,保持分数信息
|
||||
result = []
|
||||
for doc in parent_docs:
|
||||
if doc:
|
||||
pid = doc.metadata.get("id")
|
||||
scores = parent_map.get(pid, (0.0, 0.0))
|
||||
# 将分数添加到 metadata 中
|
||||
doc.metadata["embedding_score"] = scores[0]
|
||||
doc.metadata["rerank_score"] = scores[1]
|
||||
result.append((doc, scores[0] + scores[1] * 2)) # 综合分数,rerank 权重更高
|
||||
|
||||
result.sort(key=lambda x: x[1], reverse=True)
|
||||
docs = [d for d, _ in result]
|
||||
logger.info(f"[Pipeline] 获取到 {len(docs)} 个父文档")
|
||||
|
||||
Reference in New Issue
Block a user