41 lines
1.0 KiB
Python
41 lines
1.0 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
简单测试脚本:测试文档里真正有的内容
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import asyncio
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
|
|||
|
|
project_root = os.path.join(os.path.dirname(__file__), "..", "..")
|
|||
|
|
sys.path.insert(0, os.path.join(project_root, "backend"))
|
|||
|
|
|
|||
|
|
from qdrant_client import models
|
|||
|
|
from rag_core import QdrantVectorStore, get_sparse_embedder
|
|||
|
|
from app.model_services import get_embedding_service
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_dense_retrieval():
|
|||
|
|
"""测试稠密检索"""
|
|||
|
|
print("="*70)
|
|||
|
|
print("测试稠密检索...")
|
|||
|
|
print("="*70)
|
|||
|
|
|
|||
|
|
embeddings = get_embedding_service()
|
|||
|
|
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
|||
|
|
|
|||
|
|
query = "黄双银" # 用文档里真正有的名字查询
|
|||
|
|
print(f"\n查询: {query}")
|
|||
|
|
|
|||
|
|
results = vs.similarity_search(query, k=3)
|
|||
|
|
|
|||
|
|
print(f"\n找到 {len(results)} 个结果\n")
|
|||
|
|
for i, doc in enumerate(results):
|
|||
|
|
print(f"--- 结果 {i+1} ---")
|
|||
|
|
print(doc.page_content[:200])
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
test_dense_retrieval()
|