- 完全兼容现有代码:默认 enable_sparse=False - 启用时:需要安装 fastembed,设置 enable_sparse=True - 自动初始化 FastEmbedSparse 和 RetrievalMode.HYBRID - 失败时优雅回退到纯稠密 - 语法检查通过
This commit is contained in:
@@ -71,6 +71,9 @@ class IndexBuilderConfig:
|
||||
|
||||
# 其他切分器参数(当 splitter_type 非父子块时使用)
|
||||
extra_splitter_kwargs: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# 混合检索支持(默认 False,完全兼容)
|
||||
enable_sparse: bool = False
|
||||
|
||||
# ---------- 索引构建器 ----------
|
||||
class IndexBuilder:
|
||||
@@ -116,10 +119,27 @@ class IndexBuilder:
|
||||
self.embeddings = self.embedder.as_langchain_embeddings()
|
||||
|
||||
# 初始化向量存储
|
||||
self.vector_store = QdrantVectorStore(
|
||||
collection_name=config.collection_name,
|
||||
embeddings=self.embeddings if self.embedder is None else None,
|
||||
)
|
||||
# 默认 enable_sparse=False,完全兼容现有代码
|
||||
# 若需要启用混合检索,请先安装 fastembed,然后设置 enable_sparse=True
|
||||
qdrant_kwargs = {
|
||||
"collection_name": config.collection_name,
|
||||
}
|
||||
|
||||
if self.config.enable_sparse:
|
||||
try:
|
||||
from langchain_qdrant import FastEmbedSparse, RetrievalMode
|
||||
qdrant_kwargs["sparse_embedding"] = FastEmbedSparse(model_name="Qdrant/bm25")
|
||||
qdrant_kwargs["retrieval_mode"] = RetrievalMode.HYBRID
|
||||
logger.info("✅ 稀疏向量支持已启用")
|
||||
except ImportError:
|
||||
logger.warning("⚠️ fastembed 未安装,无法启用稀疏向量,继续使用纯稠密")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠️ 稀疏向量初始化失败: {e},继续使用纯稠密")
|
||||
|
||||
if self.embedder is None:
|
||||
qdrant_kwargs["embedding"] = self.embeddings
|
||||
|
||||
self.vector_store = QdrantVectorStore(**qdrant_kwargs)
|
||||
|
||||
# 根据切分类型初始化相关组件
|
||||
self._init_splitters_and_retriever()
|
||||
|
||||
@@ -14,6 +14,8 @@ tiktoken>=0.12.0
|
||||
|
||||
# Vector DB
|
||||
qdrant-client==1.17.1
|
||||
# 可选:用于稀疏向量支持
|
||||
# fastembed>=0.3.0
|
||||
|
||||
# HTTP
|
||||
httpx==0.28.1
|
||||
|
||||
Reference in New Issue
Block a user