#!/usr/bin/env python3 """ 检查 Qdrant 集合里的数据结构 """ import asyncio import os import sys from backend.rag_core import QdrantVectorStore from backend.app.model_services import get_embedding_service def check_qdrant_data(): """检查 Qdrant 中的数据结构""" print("="*70) print("检查 Qdrant 中的数据结构...") print("="*70) embeddings = get_embedding_service() vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings) client = vs.get_qdrant_client() # 先获取几个点看看 payload 结构 print("\n获取 5 个随机文档:") results = client.scroll( collection_name="rag_documents", limit=5, with_payload=True, with_vectors=True ) for i, point in enumerate(results[0], 1): print(f"\n{i}. ID: {point.id}") print(f" Payload: {point.payload}") print(f" Payload 键: {list(point.payload.keys())}") if "text" in point.payload: text = point.payload["text"] print(f" Text 长度: {len(text)}") print(f" Text 预览: {text[:150]}...") if "page_content" in point.payload: print(f" page_content: {point.payload['page_content'][:150]}...") # 看看向量 if point.vector: print(f" 向量存在: {type(point.vector)}") if isinstance(point.vector, dict): print(f" 向量键: {list(point.vector.keys())}") def check_sparse_embedder(): """检查稀疏嵌入器""" from backend.rag_core import get_sparse_embedder print("\n" + "="*70) print("检查稀疏嵌入器...") print("="*70) sparse_embedder = get_sparse_embedder() print(f"\n稀疏嵌入器: {sparse_embedder}") print(f"Vocabulary 大小: {len(sparse_embedder.model.vocab)}") print(f"示例查询: '冬天 食物'") # 用中文试试 sparse_vec = sparse_embedder.embed_query("冬天 食物") print(f"\n生成的稀疏向量:") print(f" 索引数量: {len(sparse_vec['indices'])}") print(f" 索引: {sparse_vec['indices'][:10]}") print(f" 值: {sparse_vec['values'][:10]}") if __name__ == "__main__": check_qdrant_data() check_sparse_embedder()