This commit is contained in:
80
tools/test/check_qdrant.py
Normal file
80
tools/test/check_qdrant.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
检查 Qdrant 集合里的数据结构
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
# 添加项目根目录到 Python 路径
|
||||
project_root = os.path.join(os.path.dirname(__file__), "..", "..")
|
||||
sys.path.insert(0, os.path.join(project_root, "backend"))
|
||||
sys.path.insert(0, project_root)
|
||||
|
||||
from rag_core import QdrantVectorStore
|
||||
from app.model_services import get_embedding_service
|
||||
|
||||
|
||||
def check_qdrant_data():
|
||||
"""检查 Qdrant 中的数据结构"""
|
||||
print("="*70)
|
||||
print("检查 Qdrant 中的数据结构...")
|
||||
print("="*70)
|
||||
|
||||
embeddings = get_embedding_service()
|
||||
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
client = vs.get_qdrant_client()
|
||||
|
||||
# 先获取几个点看看 payload 结构
|
||||
print("\n获取 5 个随机文档:")
|
||||
results = client.scroll(
|
||||
collection_name="rag_documents",
|
||||
limit=5,
|
||||
with_payload=True,
|
||||
with_vectors=True
|
||||
)
|
||||
|
||||
for i, point in enumerate(results[0], 1):
|
||||
print(f"\n{i}. ID: {point.id}")
|
||||
print(f" Payload: {point.payload}")
|
||||
print(f" Payload 键: {list(point.payload.keys())}")
|
||||
if "text" in point.payload:
|
||||
text = point.payload["text"]
|
||||
print(f" Text 长度: {len(text)}")
|
||||
print(f" Text 预览: {text[:150]}...")
|
||||
if "page_content" in point.payload:
|
||||
print(f" page_content: {point.payload['page_content'][:150]}...")
|
||||
|
||||
# 看看向量
|
||||
if point.vector:
|
||||
print(f" 向量存在: {type(point.vector)}")
|
||||
if isinstance(point.vector, dict):
|
||||
print(f" 向量键: {list(point.vector.keys())}")
|
||||
|
||||
|
||||
def check_sparse_embedder():
|
||||
"""检查稀疏嵌入器"""
|
||||
from rag_core import get_sparse_embedder
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("检查稀疏嵌入器...")
|
||||
print("="*70)
|
||||
|
||||
sparse_embedder = get_sparse_embedder()
|
||||
|
||||
print(f"\n稀疏嵌入器: {sparse_embedder}")
|
||||
print(f"Vocabulary 大小: {len(sparse_embedder.model.vocab)}")
|
||||
print(f"示例查询: '冬天 食物'")
|
||||
|
||||
# 用中文试试
|
||||
sparse_vec = sparse_embedder.embed_query("冬天 食物")
|
||||
print(f"\n生成的稀疏向量:")
|
||||
print(f" 索引数量: {len(sparse_vec['indices'])}")
|
||||
print(f" 索引: {sparse_vec['indices'][:10]}")
|
||||
print(f" 值: {sparse_vec['values'][:10]}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_qdrant_data()
|
||||
check_sparse_embedder()
|
||||
Reference in New Issue
Block a user