2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
|
|
|
|
|
重排模型服务模块
|
|
|
|
|
|
|
|
|
|
|
|
本模块提供统一的重排模型服务获取接口,支持自动降级:
|
|
|
|
|
|
1. 优先使用本地 llama.cpp 重排服务
|
|
|
|
|
|
2. 本地服务不可用时,自动降级到智谱 API 重排服务
|
|
|
|
|
|
|
|
|
|
|
|
主要功能:
|
|
|
|
|
|
- LocalLlamaCppRerankProvider:本地 llama.cpp 重排服务提供者
|
|
|
|
|
|
- ZhipuRerankProvider:智谱 API 重排服务提供者
|
|
|
|
|
|
- get_rerank_service():获取重排服务的统一接口
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
|
|
|
|
|
注意:本模块只负责调用 rerank server,不包含业务逻辑(文档处理、排序、top_n)
|
|
|
|
|
|
业务逻辑放在 backend/app/rag/ 目录下
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
|
from typing import List
|
2026-04-26 11:57:42 +08:00
|
|
|
|
import httpx
|
2026-04-24 22:52:36 +08:00
|
|
|
|
|
|
|
|
|
|
from .base import (
|
|
|
|
|
|
BaseServiceProvider,
|
|
|
|
|
|
FallbackServiceChain,
|
|
|
|
|
|
SingletonServiceManager
|
|
|
|
|
|
)
|
|
|
|
|
|
from ..config import (
|
|
|
|
|
|
LLAMACPP_RERANKER_URL,
|
|
|
|
|
|
LLAMACPP_API_KEY,
|
|
|
|
|
|
ZHIPUAI_API_KEY,
|
|
|
|
|
|
ZHIPU_RERANK_MODEL,
|
|
|
|
|
|
ZHIPU_API_BASE
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-26 11:57:42 +08:00
|
|
|
|
class BaseRerankService:
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
2026-04-26 11:57:42 +08:00
|
|
|
|
重排服务基类 - 纯服务层,只负责调用 server
|
|
|
|
|
|
不包含业务逻辑(文档处理、排序、top_n 等在 rag/ 目录下)
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
|
|
|
|
|
def compute_scores(self, query: str, documents: List[str]) -> List[float]:
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
2026-04-26 11:57:42 +08:00
|
|
|
|
计算每个文档与查询的相关性得分 - 纯 API 调用
|
|
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
Args:
|
|
|
|
|
|
query: 查询字符串
|
2026-04-26 11:57:42 +08:00
|
|
|
|
documents: 文档字符串列表
|
|
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
Returns:
|
2026-04-26 11:57:42 +08:00
|
|
|
|
List[float]: 每个文档的相关性得分列表
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
|
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-26 11:57:42 +08:00
|
|
|
|
class LocalLlamaCppRerankService(BaseRerankService):
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
2026-04-26 11:57:42 +08:00
|
|
|
|
本地 llama.cpp 重排服务 - 纯服务层
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
|
|
|
|
|
def __init__(self, base_url: str, api_key: str, model: str = "bge-reranker-v2-m3"):
|
2026-04-24 22:52:36 +08:00
|
|
|
|
self.base_url = base_url
|
|
|
|
|
|
self.api_key = api_key
|
|
|
|
|
|
self.model = model
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
|
|
|
|
|
def compute_scores(self, query: str, documents: List[str]) -> List[float]:
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
2026-04-26 11:57:42 +08:00
|
|
|
|
调用 llama.cpp rerank API 计算得分 - 纯 API 调用
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
|
|
|
|
|
if not documents:
|
|
|
|
|
|
return []
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
|
|
|
|
|
headers = {"Content-Type": "application/json"}
|
|
|
|
|
|
if self.api_key:
|
|
|
|
|
|
headers["Authorization"] = f"Bearer {self.api_key}"
|
|
|
|
|
|
|
|
|
|
|
|
base = self.base_url.rstrip("/")
|
|
|
|
|
|
if not base.endswith("/v1"):
|
|
|
|
|
|
base = base + "/v1"
|
|
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
payload = {
|
|
|
|
|
|
"model": self.model,
|
|
|
|
|
|
"query": query,
|
2026-04-26 11:57:42 +08:00
|
|
|
|
"documents": documents,
|
2026-04-24 22:52:36 +08:00
|
|
|
|
}
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
|
|
|
|
|
with httpx.Client(timeout=120) as client:
|
|
|
|
|
|
response = client.post(
|
|
|
|
|
|
f"{base}/rerank",
|
|
|
|
|
|
headers=headers,
|
|
|
|
|
|
json=payload,
|
|
|
|
|
|
)
|
2026-04-24 22:52:36 +08:00
|
|
|
|
response.raise_for_status()
|
2026-04-26 11:57:42 +08:00
|
|
|
|
data = response.json()
|
2026-04-24 22:52:36 +08:00
|
|
|
|
|
2026-04-26 11:57:42 +08:00
|
|
|
|
if isinstance(data, dict) and "results" in data:
|
|
|
|
|
|
results = data["results"]
|
|
|
|
|
|
results_sorted = sorted(results, key=lambda x: x["index"])
|
|
|
|
|
|
return [item["relevance_score"] for item in results_sorted]
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise ValueError(f"未知的 rerank API 响应格式: {data}")
|
2026-04-24 22:52:36 +08:00
|
|
|
|
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
|
|
|
|
|
class ZhipuRerankService(BaseRerankService):
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
2026-04-26 11:57:42 +08:00
|
|
|
|
智谱 API 重排服务 - 纯服务层
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
def __init__(self, model: str | None = None):
|
|
|
|
|
|
self.model = model or ZHIPU_RERANK_MODEL
|
|
|
|
|
|
self.api_key = ZHIPUAI_API_KEY
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
|
|
|
|
|
def compute_scores(self, query: str, documents: List[str]) -> List[float]:
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
2026-04-26 11:57:42 +08:00
|
|
|
|
调用智谱 rerank API 计算得分 - 纯 API 调用
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
|
|
|
|
|
if not documents:
|
|
|
|
|
|
return []
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
try:
|
|
|
|
|
|
from zhipuai import ZhipuAI
|
|
|
|
|
|
client = ZhipuAI(api_key=self.api_key)
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
response = client.rerank.create(
|
|
|
|
|
|
model=self.model,
|
|
|
|
|
|
query=query,
|
2026-04-26 11:57:42 +08:00
|
|
|
|
documents=documents,
|
2026-04-24 22:52:36 +08:00
|
|
|
|
)
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
|
|
|
|
|
results_sorted = sorted(response.results, key=lambda x: x.index)
|
|
|
|
|
|
return [item.relevance_score for item in results_sorted]
|
|
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
except Exception as e:
|
2026-04-26 11:57:42 +08:00
|
|
|
|
logger.warning(f"智谱 rerank 调用失败: {e}")
|
|
|
|
|
|
raise
|
2026-04-24 22:52:36 +08:00
|
|
|
|
|
|
|
|
|
|
|
2026-04-26 11:57:42 +08:00
|
|
|
|
class LocalLlamaCppRerankProvider(BaseServiceProvider[BaseRerankService]):
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
|
|
|
|
|
本地 llama.cpp 重排服务提供者
|
|
|
|
|
|
"""
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
def __init__(self, model: str = "bge-reranker-v2-m3"):
|
|
|
|
|
|
super().__init__("local_llamacpp_rerank")
|
|
|
|
|
|
self._model = model
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
def is_available(self) -> bool:
|
|
|
|
|
|
"""
|
|
|
|
|
|
检查本地 llama.cpp 重排服务是否可用
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not LLAMACPP_RERANKER_URL:
|
|
|
|
|
|
logger.warning("LLAMACPP_RERANKER_URL 未配置")
|
|
|
|
|
|
return False
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
try:
|
2026-04-26 11:57:42 +08:00
|
|
|
|
service = LocalLlamaCppRerankService(
|
2026-04-24 22:52:36 +08:00
|
|
|
|
base_url=LLAMACPP_RERANKER_URL,
|
|
|
|
|
|
api_key=LLAMACPP_API_KEY,
|
|
|
|
|
|
model=self._model
|
|
|
|
|
|
)
|
2026-04-26 11:57:42 +08:00
|
|
|
|
test_scores = service.compute_scores("test query", ["test document"])
|
2026-04-24 22:52:36 +08:00
|
|
|
|
logger.info(f"本地 llama.cpp 重排服务可用")
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(f"本地 llama.cpp 重排服务不可用: {e}")
|
|
|
|
|
|
return False
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
|
|
|
|
|
def get_service(self) -> BaseRerankService:
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
|
|
|
|
|
获取本地 llama.cpp 重排服务
|
|
|
|
|
|
"""
|
|
|
|
|
|
if self._service_instance is None:
|
2026-04-26 11:57:42 +08:00
|
|
|
|
self._service_instance = LocalLlamaCppRerankService(
|
2026-04-24 22:52:36 +08:00
|
|
|
|
base_url=LLAMACPP_RERANKER_URL,
|
|
|
|
|
|
api_key=LLAMACPP_API_KEY,
|
|
|
|
|
|
model=self._model
|
|
|
|
|
|
)
|
|
|
|
|
|
return self._service_instance
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-26 11:57:42 +08:00
|
|
|
|
class ZhipuRerankProvider(BaseServiceProvider[BaseRerankService]):
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
|
|
|
|
|
智谱 API 重排服务提供者
|
|
|
|
|
|
"""
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
def __init__(self, model: str | None = None):
|
|
|
|
|
|
super().__init__("zhipu_rerank")
|
|
|
|
|
|
self._model = model or ZHIPU_RERANK_MODEL
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
def is_available(self) -> bool:
|
|
|
|
|
|
"""
|
|
|
|
|
|
检查智谱 API 重排服务是否可用
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not ZHIPUAI_API_KEY:
|
|
|
|
|
|
logger.warning("ZHIPUAI_API_KEY 未配置")
|
|
|
|
|
|
return False
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
try:
|
2026-04-26 11:57:42 +08:00
|
|
|
|
service = ZhipuRerankService(model=self._model)
|
|
|
|
|
|
test_scores = service.compute_scores("test query", ["test document"])
|
2026-04-24 22:52:36 +08:00
|
|
|
|
logger.info(f"智谱重排服务可用")
|
|
|
|
|
|
return True
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
logger.warning("zhipuai 库未安装")
|
|
|
|
|
|
return False
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(f"智谱重排服务不可用: {e}")
|
|
|
|
|
|
return False
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
|
|
|
|
|
def get_service(self) -> BaseRerankService:
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
|
|
|
|
|
获取智谱 API 重排服务
|
|
|
|
|
|
"""
|
|
|
|
|
|
if self._service_instance is None:
|
2026-04-26 11:57:42 +08:00
|
|
|
|
self._service_instance = ZhipuRerankService(model=self._model)
|
2026-04-24 22:52:36 +08:00
|
|
|
|
return self._service_instance
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-26 11:57:42 +08:00
|
|
|
|
def get_rerank_service() -> BaseRerankService:
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
2026-04-26 11:57:42 +08:00
|
|
|
|
获取重排服务(带自动降级)- 纯服务层
|
|
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
Returns:
|
2026-04-26 11:57:42 +08:00
|
|
|
|
BaseRerankService: 重排服务实例
|
2026-04-24 22:52:36 +08:00
|
|
|
|
"""
|
|
|
|
|
|
def _create_chain():
|
|
|
|
|
|
primary = LocalLlamaCppRerankProvider()
|
|
|
|
|
|
fallback = ZhipuRerankProvider()
|
|
|
|
|
|
return FallbackServiceChain(primary, [fallback])
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|
2026-04-24 22:52:36 +08:00
|
|
|
|
chain = SingletonServiceManager.get_or_create("rerank_service_chain", _create_chain)
|
|
|
|
|
|
return chain.get_available_service()
|
2026-04-26 11:57:42 +08:00
|
|
|
|
|