#!/usr/bin/env python3 """ 简单的 RAG 检索测试 使用 app/rag/retriever 提供的功能 """ import asyncio from backend.app.rag.retriever import ( create_parent_hybrid_retriever, create_hybrid_retriever ) from backend.rag_core import QdrantHybridStore # 统一的测试查询列表 TEST_QUERIES = [ "黄双银", ] async def test_simple_vector_store_search(): """测试:直接使用 QdrantHybridStore 的 asimilarity_search""" print("="*80) print("测试 1: QdrantHybridStore.asimilarity_search") print("="*80) vs = QdrantHybridStore(collection_name="rag_documents") for query in TEST_QUERIES: print(f"\n查询: {query}") print("-" * 60) docs = await vs.asimilarity_search(query, k=10) if docs: print(f"✓ 找到 {len(docs)} 个文档") for i, doc in enumerate(docs, 1): print(f"\n {i}. 来源: {doc.metadata.get('source', 'unknown')}") preview = doc.page_content[:120].strip() if len(doc.page_content) > 120: preview += "..." print(f" 内容: {preview}") else: print("✗ 未找到结果") await vs.close_async_client() print("\n" + "="*80) async def test_hybrid_retriever(): """测试:HybridRetriever(子文档检索)""" print("\n" + "="*80) print("测试 2: HybridRetriever (子文档混合检索)") print("="*80) retriever = create_hybrid_retriever( collection_name="rag_documents", search_k=10 ) for query in TEST_QUERIES: print(f"\n查询: {query}") print("-" * 60) docs = await retriever.ainvoke(query) if docs: print(f"✓ 找到 {len(docs)} 个子文档") for i, doc in enumerate(docs, 1): print(f"\n {i}. parent_id: {doc.metadata.get('parent_id', 'none')}") preview = doc.page_content[:100].strip() if len(doc.page_content) > 100: preview += "..." print(f" 内容: {preview}") else: print("✗ 未找到结果") print("\n" + "="*80) async def test_parent_hybrid_retriever(): """测试:ParentHybridRetriever(父子文档混合检索)""" print("\n" + "="*80) print("测试 3: ParentHybridRetriever (父子文档混合检索)") print("="*80) retriever = create_parent_hybrid_retriever( collection_name="rag_documents", search_k=10 ) for query in TEST_QUERIES: print(f"\n查询: {query}") print("-" * 60) docs = await retriever.ainvoke(query) if docs: print(f"✓ 找到 {len(docs)} 个父文档") for i, doc in enumerate(docs, 1): print(f"\n {i}. 来源: {doc.metadata.get('source', 'unknown')}") preview = doc.page_content[:150].strip() if len(doc.page_content) > 150: preview += "..." print(f" 内容:\n {preview}") else: print("✗ 未找到结果") print("\n" + "="*80) async def main(): """主测试函数""" print("\n" + "="*80) print("RAG 检索功能测试") print("="*80) # 测试 1: 直接使用 vector store await test_simple_vector_store_search() # 测试 2: HybridRetriever await test_hybrid_retriever() # 测试 3: ParentHybridRetriever await test_parent_hybrid_retriever() print("\n🎉 所有测试完成!") if __name__ == "__main__": asyncio.run(main())