重构:添加模型服务模块,支持嵌入和重排服务的自动降级
新增功能: - 创建 app/model_services 模块,提供统一的模型服务获取接口 - 实现 BaseServiceProvider 基类和 FallbackServiceChain 降级链 - 实现 get_embedding_service():优先本地 llama.cpp,降级到智谱 API - 实现 get_rerank_service():优先本地 llama.cpp,降级到智谱 API - 支持单例管理,确保全局只有一个服务实例 修改内容: - 更新 app/config.py,添加智谱 API 相关配置 - 修改 rag_core/vector_store.py:支持接受外部传入的 embeddings - 修改 rag_core/retriever_factory.py:支持接受外部传入的 embeddings - 修改 app/agent/rag_initializer.py:使用 get_embedding_service() - 修改 app/rag/pipeline.py:使用 get_rerank_service() - 修改 app/memory/mem0_client.py:智能判断可用服务配置 mem0 - 修改 rag_indexer/index_builder.py:支持使用新服务,保持向后兼容 - 修改 rag_indexer/config.py:添加智谱配置 环境变量: - ZHIPUAI_API_KEY:智谱 API 密钥(必选) - ZHIPU_EMBEDDING_MODEL:可选,默认 embedding-3 - ZHIPU_RERANK_MODEL:可选,默认 rerank-2 - ZHIPU_API_BASE:可选,默认 https://open.bigmodel.cn/api/paas/v4
This commit is contained in:
@@ -17,10 +17,11 @@ def create_parent_retriever(
|
||||
parent_chunk_overlap: int = 100,
|
||||
child_chunk_size: int = 200,
|
||||
child_chunk_overlap: int = 20,
|
||||
embeddings: Embeddings | None = None,
|
||||
) -> ParentDocumentRetriever:
|
||||
"""
|
||||
创建 ParentDocumentRetriever 实例。
|
||||
|
||||
|
||||
Args:
|
||||
collection_name: Qdrant 集合名称,默认 "rag_documents"
|
||||
parent_splitter: 父文档切分器,默认 None(使用默认参数创建)
|
||||
@@ -31,16 +32,18 @@ def create_parent_retriever(
|
||||
parent_chunk_overlap: 父文档块重叠大小,默认 100
|
||||
child_chunk_size: 子文档块大小,默认 200
|
||||
child_chunk_overlap: 子文档块重叠大小,默认 20
|
||||
|
||||
embeddings: 嵌入模型实例,默认 None(使用内部默认的 LocalLlamaCppEmbedder)
|
||||
|
||||
Returns:
|
||||
ParentDocumentRetriever 实例
|
||||
"""
|
||||
# 嵌入模型
|
||||
embedder = LlamaCppEmbedder()
|
||||
embeddings = embedder.as_langchain_embeddings()
|
||||
|
||||
if embeddings is None:
|
||||
embedder = LlamaCppEmbedder()
|
||||
embeddings = embedder.as_langchain_embeddings()
|
||||
|
||||
# 向量存储(只读)
|
||||
vector_store = QdrantVectorStore(collection_name=collection_name)
|
||||
vector_store = QdrantVectorStore(collection_name=collection_name, embeddings=embeddings)
|
||||
|
||||
# 切分器(若未提供则创建默认)
|
||||
if parent_splitter is None:
|
||||
|
||||
@@ -8,6 +8,7 @@ import time
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_qdrant import QdrantVectorStore as LangchainQdrantVS
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http.models import Distance, VectorParams
|
||||
@@ -23,18 +24,25 @@ logger = logging.getLogger(__name__)
|
||||
class QdrantVectorStore:
|
||||
"""Qdrant 向量数据库操作包装器。"""
|
||||
|
||||
def __init__(self, collection_name: str):
|
||||
def __init__(self, collection_name: str, embeddings: Optional[Embeddings] = None):
|
||||
"""
|
||||
Args:
|
||||
collection_name: Qdrant 集合名称。
|
||||
embeddings: 嵌入模型实例,默认 None(使用内部默认的 LlamaCppEmbedder)。
|
||||
"""
|
||||
self.collection_name = collection_name
|
||||
self._client: Optional[QdrantClient] = None
|
||||
self._connection_attempts = 0
|
||||
self._last_connection_time: Optional[float] = None
|
||||
|
||||
embedder = LlamaCppEmbedder()
|
||||
self.embeddings = embedder.as_langchain_embeddings()
|
||||
|
||||
# 嵌入模型
|
||||
if embeddings is None:
|
||||
embedder = LlamaCppEmbedder()
|
||||
self.embeddings = embedder.as_langchain_embeddings()
|
||||
self._embedder = embedder
|
||||
else:
|
||||
self.embeddings = embeddings
|
||||
self._embedder = None
|
||||
|
||||
self.create_collection()
|
||||
|
||||
@@ -90,8 +98,13 @@ class QdrantVectorStore:
|
||||
|
||||
def create_collection(self, force_recreate: bool = False):
|
||||
"""创建集合,设置合适的向量维度。"""
|
||||
embedder = LlamaCppEmbedder()
|
||||
vector_size = embedder.get_embedding_dimension()
|
||||
if self._embedder is not None:
|
||||
# 使用内部的 embedder 获取维度
|
||||
vector_size = self._embedder.get_embedding_dimension()
|
||||
else:
|
||||
# 使用外部传入的 embeddings,通过测试获取维度
|
||||
test_embedding = self.embeddings.embed_query("test")
|
||||
vector_size = len(test_embedding)
|
||||
|
||||
max_retries = 3
|
||||
base_delay = 2
|
||||
|
||||
Reference in New Issue
Block a user