添加稀疏模型本地缓存功能
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Failing after 13m54s

- 创建 download_sparse_model.py 脚本用于下载稀疏模型到本地
- 添加 SPARSE_MODEL_PATH 和 SPARSE_MODEL_NAME 配置
- 修改 retriever.py 和 index_builder.py 使用 cache_dir
- 更新 .gitignore 排除 models/ 目录
- 更新 Dockerfile 在构建时下载稀疏模型
This commit is contained in:
2026-05-03 18:55:39 +08:00
parent 5c45806ad3
commit 2183c901b4
6 changed files with 117 additions and 6 deletions

View File

@@ -28,6 +28,7 @@ from langchain_core.retrievers import BaseRetriever
from rag_core import QDRANT_URL, QDRANT_API_KEY
from rag_core.client import create_qdrant_client as create_core_qdrant_client
from app.model_services import get_embedding_service
from app.config import SPARSE_MODEL_PATH, SPARSE_MODEL_NAME
from app.logger import info, warning
# 模块级常量
@@ -134,9 +135,12 @@ def create_hybrid_retriever(
raise ValueError(f"Qdrant 集合 '{collection_name}' 不存在")
raise
# 初始化稀疏嵌入
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
info("✅ FastEmbedSparse 初始化成功")
# 初始化稀疏嵌入(使用本地缓存目录)
sparse_embeddings = FastEmbedSparse(
model_name=SPARSE_MODEL_NAME,
cache_dir=SPARSE_MODEL_PATH
)
info(f"✅ FastEmbedSparse 初始化成功 (cache_dir={SPARSE_MODEL_PATH})")
# 创建混合模式的 QdrantVectorStore
vector_store = QdrantVectorStore(