Files
ailine/tools/test/test_rag_indexer_result.py

132 lines
3.6 KiB
Python
Raw Permalink Normal View History

2026-04-19 22:01:55 +08:00
#!/usr/bin/env python3
"""
简单的 RAG 检索测试
使用 app/rag/retriever 提供的功能
2026-04-19 22:01:55 +08:00
"""
import asyncio
from backend.app.rag.retriever import (
create_parent_hybrid_retriever,
create_hybrid_retriever
)
from backend.rag_core import QdrantHybridStore
2026-04-19 22:01:55 +08:00
# 统一的测试查询列表
TEST_QUERIES = [
"黄双银",
]
async def test_simple_vector_store_search():
"""测试:直接使用 QdrantHybridStore 的 asimilarity_search"""
print("="*80)
print("测试 1: QdrantHybridStore.asimilarity_search")
print("="*80)
2026-04-19 22:01:55 +08:00
vs = QdrantHybridStore(collection_name="rag_documents")
2026-04-19 22:01:55 +08:00
for query in TEST_QUERIES:
print(f"\n查询: {query}")
print("-" * 60)
2026-04-19 22:01:55 +08:00
docs = await vs.asimilarity_search(query, k=10)
if docs:
print(f"✓ 找到 {len(docs)} 个文档")
for i, doc in enumerate(docs, 1):
print(f"\n {i}. 来源: {doc.metadata.get('source', 'unknown')}")
preview = doc.page_content[:120].strip()
if len(doc.page_content) > 120:
preview += "..."
print(f" 内容: {preview}")
else:
print("✗ 未找到结果")
await vs.close_async_client()
print("\n" + "="*80)
async def test_hybrid_retriever():
"""测试HybridRetriever子文档检索"""
print("\n" + "="*80)
print("测试 2: HybridRetriever (子文档混合检索)")
print("="*80)
retriever = create_hybrid_retriever(
collection_name="rag_documents",
search_k=10
)
for query in TEST_QUERIES:
print(f"\n查询: {query}")
print("-" * 60)
docs = await retriever.ainvoke(query)
if docs:
print(f"✓ 找到 {len(docs)} 个子文档")
for i, doc in enumerate(docs, 1):
print(f"\n {i}. parent_id: {doc.metadata.get('parent_id', 'none')}")
preview = doc.page_content[:100].strip()
if len(doc.page_content) > 100:
preview += "..."
print(f" 内容: {preview}")
else:
print("✗ 未找到结果")
print("\n" + "="*80)
async def test_parent_hybrid_retriever():
"""测试ParentHybridRetriever父子文档混合检索"""
print("\n" + "="*80)
print("测试 3: ParentHybridRetriever (父子文档混合检索)")
print("="*80)
retriever = create_parent_hybrid_retriever(
collection_name="rag_documents",
search_k=10
)
for query in TEST_QUERIES:
print(f"\n查询: {query}")
print("-" * 60)
docs = await retriever.ainvoke(query)
if docs:
print(f"✓ 找到 {len(docs)} 个父文档")
for i, doc in enumerate(docs, 1):
print(f"\n {i}. 来源: {doc.metadata.get('source', 'unknown')}")
preview = doc.page_content[:150].strip()
if len(doc.page_content) > 150:
preview += "..."
print(f" 内容:\n {preview}")
else:
print("✗ 未找到结果")
print("\n" + "="*80)
async def main():
"""主测试函数"""
print("\n" + "="*80)
print("RAG 检索功能测试")
print("="*80)
# 测试 1: 直接使用 vector store
await test_simple_vector_store_search()
# 测试 2: HybridRetriever
await test_hybrid_retriever()
# 测试 3: ParentHybridRetriever
await test_parent_hybrid_retriever()
print("\n🎉 所有测试完成!")
2026-04-19 22:01:55 +08:00
if __name__ == "__main__":
asyncio.run(main())