- 创建 download_sparse_model.py 脚本用于下载稀疏模型到本地 - 添加 SPARSE_MODEL_PATH 和 SPARSE_MODEL_NAME 配置 - 修改 retriever.py 和 index_builder.py 使用 cache_dir - 更新 .gitignore 排除 models/ 目录 - 更新 Dockerfile 在构建时下载稀疏模型
This commit is contained in:
@@ -41,6 +41,15 @@ try:
|
||||
except ImportError:
|
||||
HAS_MODEL_SERVICES = False
|
||||
|
||||
# 尝试导入稀疏模型配置(如果可用)
|
||||
try:
|
||||
from app.config import SPARSE_MODEL_PATH, SPARSE_MODEL_NAME
|
||||
HAS_SPARSE_CONFIG = True
|
||||
except ImportError:
|
||||
HAS_SPARSE_CONFIG = False
|
||||
SPARSE_MODEL_PATH = "./models/sparse"
|
||||
SPARSE_MODEL_NAME = "Qdrant/bm25"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------- 配置数据类 ----------
|
||||
@@ -118,10 +127,13 @@ class IndexBuilder:
|
||||
self.embedder = LlamaCppEmbedder()
|
||||
self.embeddings = self.embedder.as_langchain_embeddings()
|
||||
|
||||
# 初始化稀疏嵌入
|
||||
# 初始化稀疏嵌入(使用本地缓存目录)
|
||||
from langchain_qdrant import FastEmbedSparse, RetrievalMode
|
||||
self.sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
|
||||
logger.info("✅ FastEmbedSparse 初始化成功")
|
||||
self.sparse_embeddings = FastEmbedSparse(
|
||||
model_name=SPARSE_MODEL_NAME,
|
||||
cache_dir=SPARSE_MODEL_PATH
|
||||
)
|
||||
logger.info(f"✅ FastEmbedSparse 初始化成功 (cache_dir={SPARSE_MODEL_PATH})")
|
||||
|
||||
# 初始化向量存储(混合检索模式)
|
||||
self.vector_store = QdrantVectorStore(
|
||||
|
||||
Reference in New Issue
Block a user