Files
ailine/tools/download_bm25.py
root 60afa86ded
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Has been cancelled
feat: 实现 BM25 稀疏 + 稠密向量混合检索功能
2026-05-04 02:01:22 +08:00

23 lines
727 B
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
BM25模型预下载脚本
执行后将模型缓存到 ./models/fastembed_cache 目录打包进Docker镜像
"""
import os
from fastembed.sparse.sparse_text_embedding import SparseTextEmbedding
if __name__ == "__main__":
# 指定缓存目录
cache_dir = "./models/fastembed_cache"
os.makedirs(cache_dir, exist_ok=True)
print("正在下载BM25稀疏向量模型...")
model = SparseTextEmbedding(
model_name="Qdrant/bm25",
cache_dir=cache_dir
)
# 触发一次推理,确保模型文件完整下载
list(model.embed(["init trigger"]))
print(f"✅ BM25模型已成功缓存到: {cache_dir}")
print("请将该目录提交到项目仓库打包进Docker镜像")