refactor: 重构 rerank 架构,分离服务层和业务逻辑
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Has been cancelled

- rerank_services.py:纯服务层,只负责调用 rerank server
- rag/rerank.py:业务逻辑层,负责文档处理、排序、top_n
- 更新 pipeline.py 使用新架构
- 架构与 embedding_services.py 保持一致
This commit is contained in:
2026-04-26 11:57:42 +08:00
parent 55c910bbe0
commit f63c394fcd
3 changed files with 176 additions and 85 deletions

View File

@@ -7,6 +7,7 @@ from langchain_core.documents import Document
from langchain_core.language_models import BaseLanguageModel
from ..model_services import get_rerank_service
from .rerank import create_document_reranker
from .query_transform import MultiQueryGenerator
from .fusion import reciprocal_rank_fusion
@@ -38,7 +39,7 @@ class RAGPipeline:
# 初始化组件 - 使用统一的重排服务获取接口
self.query_generator = MultiQueryGenerator(llm=llm, num_queries=num_queries)
self.reranker = get_rerank_service()
self.reranker = create_document_reranker()
async def aretrieve(self, query: str) -> List[Document]:
"""

89
backend/app/rag/rerank.py Normal file
View File

@@ -0,0 +1,89 @@
"""
重排业务逻辑模块
本模块包含 RAG 相关的重排业务逻辑文档处理、排序、top_n
使用 model_services/rerank_services.py 提供的纯服务层
"""
import logging
from typing import List
from langchain_core.documents import Document
from ..model_services import get_rerank_service
logger = logging.getLogger(__name__)
class DocumentReranker:
"""
文档重排器 - 业务逻辑层
负责:
- 从 Document 提取内容
- 调用 rerank service 获取得分
- 根据得分排序
- 返回 top_n 文档
"""
def __init__(self, rerank_service=None):
"""
初始化文档重排器
Args:
rerank_service: 重排服务(可选,默认通过 get_rerank_service() 获取)
"""
self._rerank_service = rerank_service or get_rerank_service()
def compress_documents(
self,
documents: List[Document],
query: str,
top_n: int = 5
) -> List[Document]:
"""
对文档进行重排 - 业务逻辑
Args:
documents: 待排序的文档列表
query: 查询字符串
top_n: 返回前 N 个结果
Returns:
List[Document]: 排序后的文档列表
"""
if not documents:
return []
try:
# 1. 从 Document 提取内容(业务逻辑)
doc_contents = [doc.page_content for doc in documents]
# 2. 调用纯服务层计算得分
scores = self._rerank_service.compute_scores(query, doc_contents)
# 3. 根据得分排序(业务逻辑)
doc_score_pairs = list(zip(documents, scores))
doc_score_pairs_sorted = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
# 4. 取 top_n
top_docs = [pair[0] for pair in doc_score_pairs_sorted[:top_n]]
return top_docs
except Exception as e:
logger.warning(f"重排过程出错,返回原始前 {top_n} 个结果: {e}")
return documents[:top_n]
def create_document_reranker(rerank_service=None) -> DocumentReranker:
"""
创建文档重排器的工厂函数
Args:
rerank_service: 重排服务(可选)
Returns:
DocumentReranker: 文档重排器实例
"""
return DocumentReranker(rerank_service)