132 lines
3.6 KiB
Python
132 lines
3.6 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
简单的 RAG 检索测试
|
||
使用 app/rag/retriever 提供的功能
|
||
"""
|
||
|
||
import asyncio
|
||
from backend.app.rag.retriever import (
|
||
create_parent_hybrid_retriever,
|
||
create_hybrid_retriever
|
||
)
|
||
from backend.rag_core import QdrantHybridStore
|
||
|
||
|
||
# 统一的测试查询列表
|
||
TEST_QUERIES = [
|
||
"黄双银",
|
||
]
|
||
|
||
|
||
async def test_simple_vector_store_search():
|
||
"""测试:直接使用 QdrantHybridStore 的 asimilarity_search"""
|
||
print("="*80)
|
||
print("测试 1: QdrantHybridStore.asimilarity_search")
|
||
print("="*80)
|
||
|
||
vs = QdrantHybridStore(collection_name="rag_documents")
|
||
|
||
for query in TEST_QUERIES:
|
||
print(f"\n查询: {query}")
|
||
print("-" * 60)
|
||
|
||
docs = await vs.asimilarity_search(query, k=10)
|
||
|
||
if docs:
|
||
print(f"✓ 找到 {len(docs)} 个文档")
|
||
for i, doc in enumerate(docs, 1):
|
||
print(f"\n {i}. 来源: {doc.metadata.get('source', 'unknown')}")
|
||
preview = doc.page_content[:120].strip()
|
||
if len(doc.page_content) > 120:
|
||
preview += "..."
|
||
print(f" 内容: {preview}")
|
||
else:
|
||
print("✗ 未找到结果")
|
||
|
||
await vs.close_async_client()
|
||
print("\n" + "="*80)
|
||
|
||
|
||
async def test_hybrid_retriever():
|
||
"""测试:HybridRetriever(子文档检索)"""
|
||
print("\n" + "="*80)
|
||
print("测试 2: HybridRetriever (子文档混合检索)")
|
||
print("="*80)
|
||
|
||
retriever = create_hybrid_retriever(
|
||
collection_name="rag_documents",
|
||
search_k=10
|
||
)
|
||
|
||
for query in TEST_QUERIES:
|
||
print(f"\n查询: {query}")
|
||
print("-" * 60)
|
||
|
||
docs = await retriever.ainvoke(query)
|
||
|
||
if docs:
|
||
print(f"✓ 找到 {len(docs)} 个子文档")
|
||
for i, doc in enumerate(docs, 1):
|
||
print(f"\n {i}. parent_id: {doc.metadata.get('parent_id', 'none')}")
|
||
preview = doc.page_content[:100].strip()
|
||
if len(doc.page_content) > 100:
|
||
preview += "..."
|
||
print(f" 内容: {preview}")
|
||
else:
|
||
print("✗ 未找到结果")
|
||
|
||
print("\n" + "="*80)
|
||
|
||
|
||
async def test_parent_hybrid_retriever():
|
||
"""测试:ParentHybridRetriever(父子文档混合检索)"""
|
||
print("\n" + "="*80)
|
||
print("测试 3: ParentHybridRetriever (父子文档混合检索)")
|
||
print("="*80)
|
||
|
||
retriever = create_parent_hybrid_retriever(
|
||
collection_name="rag_documents",
|
||
search_k=10
|
||
)
|
||
|
||
for query in TEST_QUERIES:
|
||
print(f"\n查询: {query}")
|
||
print("-" * 60)
|
||
|
||
docs = await retriever.ainvoke(query)
|
||
|
||
if docs:
|
||
print(f"✓ 找到 {len(docs)} 个父文档")
|
||
for i, doc in enumerate(docs, 1):
|
||
print(f"\n {i}. 来源: {doc.metadata.get('source', 'unknown')}")
|
||
preview = doc.page_content[:150].strip()
|
||
if len(doc.page_content) > 150:
|
||
preview += "..."
|
||
print(f" 内容:\n {preview}")
|
||
else:
|
||
print("✗ 未找到结果")
|
||
|
||
print("\n" + "="*80)
|
||
|
||
|
||
async def main():
|
||
"""主测试函数"""
|
||
print("\n" + "="*80)
|
||
print("RAG 检索功能测试")
|
||
print("="*80)
|
||
|
||
# 测试 1: 直接使用 vector store
|
||
await test_simple_vector_store_search()
|
||
|
||
# 测试 2: HybridRetriever
|
||
await test_hybrid_retriever()
|
||
|
||
# 测试 3: ParentHybridRetriever
|
||
await test_parent_hybrid_retriever()
|
||
|
||
print("\n🎉 所有测试完成!")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main())
|