2026-04-19 22:01:55 +08:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
"""
|
2026-05-04 17:58:10 +08:00
|
|
|
|
简单的 RAG 检索测试
|
|
|
|
|
|
使用 app/rag/retriever 提供的功能
|
2026-04-19 22:01:55 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import asyncio
|
2026-05-04 17:58:10 +08:00
|
|
|
|
from backend.app.rag.retriever import (
|
|
|
|
|
|
create_parent_hybrid_retriever,
|
|
|
|
|
|
create_hybrid_retriever
|
|
|
|
|
|
)
|
|
|
|
|
|
from backend.rag_core import QdrantHybridStore
|
2026-04-19 22:01:55 +08:00
|
|
|
|
|
|
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
# 统一的测试查询列表
|
|
|
|
|
|
TEST_QUERIES = [
|
|
|
|
|
|
"黄双银",
|
|
|
|
|
|
]
|
2026-05-04 02:54:37 +08:00
|
|
|
|
|
|
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
async def test_simple_vector_store_search():
|
|
|
|
|
|
"""测试:直接使用 QdrantHybridStore 的 asimilarity_search"""
|
|
|
|
|
|
print("="*80)
|
|
|
|
|
|
print("测试 1: QdrantHybridStore.asimilarity_search")
|
|
|
|
|
|
print("="*80)
|
2026-04-19 22:01:55 +08:00
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
vs = QdrantHybridStore(collection_name="rag_documents")
|
2026-04-19 22:01:55 +08:00
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
for query in TEST_QUERIES:
|
|
|
|
|
|
print(f"\n查询: {query}")
|
|
|
|
|
|
print("-" * 60)
|
2026-04-19 22:01:55 +08:00
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
docs = await vs.asimilarity_search(query, k=10)
|
|
|
|
|
|
|
|
|
|
|
|
if docs:
|
|
|
|
|
|
print(f"✓ 找到 {len(docs)} 个文档")
|
|
|
|
|
|
for i, doc in enumerate(docs, 1):
|
|
|
|
|
|
print(f"\n {i}. 来源: {doc.metadata.get('source', 'unknown')}")
|
|
|
|
|
|
preview = doc.page_content[:120].strip()
|
|
|
|
|
|
if len(doc.page_content) > 120:
|
|
|
|
|
|
preview += "..."
|
|
|
|
|
|
print(f" 内容: {preview}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print("✗ 未找到结果")
|
|
|
|
|
|
|
|
|
|
|
|
await vs.close_async_client()
|
|
|
|
|
|
print("\n" + "="*80)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def test_hybrid_retriever():
|
|
|
|
|
|
"""测试:HybridRetriever(子文档检索)"""
|
|
|
|
|
|
print("\n" + "="*80)
|
|
|
|
|
|
print("测试 2: HybridRetriever (子文档混合检索)")
|
|
|
|
|
|
print("="*80)
|
|
|
|
|
|
|
|
|
|
|
|
retriever = create_hybrid_retriever(
|
2026-05-04 02:54:37 +08:00
|
|
|
|
collection_name="rag_documents",
|
2026-05-04 17:58:10 +08:00
|
|
|
|
search_k=10
|
2026-05-04 02:54:37 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
for query in TEST_QUERIES:
|
|
|
|
|
|
print(f"\n查询: {query}")
|
|
|
|
|
|
print("-" * 60)
|
|
|
|
|
|
|
|
|
|
|
|
docs = await retriever.ainvoke(query)
|
|
|
|
|
|
|
|
|
|
|
|
if docs:
|
|
|
|
|
|
print(f"✓ 找到 {len(docs)} 个子文档")
|
|
|
|
|
|
for i, doc in enumerate(docs, 1):
|
|
|
|
|
|
print(f"\n {i}. parent_id: {doc.metadata.get('parent_id', 'none')}")
|
|
|
|
|
|
preview = doc.page_content[:100].strip()
|
|
|
|
|
|
if len(doc.page_content) > 100:
|
|
|
|
|
|
preview += "..."
|
|
|
|
|
|
print(f" 内容: {preview}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print("✗ 未找到结果")
|
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "="*80)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def test_parent_hybrid_retriever():
|
|
|
|
|
|
"""测试:ParentHybridRetriever(父子文档混合检索)"""
|
|
|
|
|
|
print("\n" + "="*80)
|
|
|
|
|
|
print("测试 3: ParentHybridRetriever (父子文档混合检索)")
|
|
|
|
|
|
print("="*80)
|
|
|
|
|
|
|
|
|
|
|
|
retriever = create_parent_hybrid_retriever(
|
2026-05-04 02:54:37 +08:00
|
|
|
|
collection_name="rag_documents",
|
2026-05-04 17:58:10 +08:00
|
|
|
|
search_k=10
|
2026-05-04 02:54:37 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
for query in TEST_QUERIES:
|
|
|
|
|
|
print(f"\n查询: {query}")
|
|
|
|
|
|
print("-" * 60)
|
2026-05-04 02:54:37 +08:00
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
docs = await retriever.ainvoke(query)
|
2026-05-04 02:54:37 +08:00
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
if docs:
|
|
|
|
|
|
print(f"✓ 找到 {len(docs)} 个父文档")
|
|
|
|
|
|
for i, doc in enumerate(docs, 1):
|
|
|
|
|
|
print(f"\n {i}. 来源: {doc.metadata.get('source', 'unknown')}")
|
|
|
|
|
|
preview = doc.page_content[:150].strip()
|
|
|
|
|
|
if len(doc.page_content) > 150:
|
|
|
|
|
|
preview += "..."
|
|
|
|
|
|
print(f" 内容:\n {preview}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print("✗ 未找到结果")
|
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "="*80)
|
2026-05-04 02:54:37 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def main():
|
|
|
|
|
|
"""主测试函数"""
|
2026-05-04 17:58:10 +08:00
|
|
|
|
print("\n" + "="*80)
|
|
|
|
|
|
print("RAG 检索功能测试")
|
|
|
|
|
|
print("="*80)
|
2026-05-04 02:54:37 +08:00
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
# 测试 1: 直接使用 vector store
|
|
|
|
|
|
await test_simple_vector_store_search()
|
2026-05-04 02:54:37 +08:00
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
# 测试 2: HybridRetriever
|
|
|
|
|
|
await test_hybrid_retriever()
|
2026-05-04 02:54:37 +08:00
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
# 测试 3: ParentHybridRetriever
|
|
|
|
|
|
await test_parent_hybrid_retriever()
|
2026-05-04 02:54:37 +08:00
|
|
|
|
|
2026-05-04 17:58:10 +08:00
|
|
|
|
print("\n🎉 所有测试完成!")
|
2026-05-04 02:54:37 +08:00
|
|
|
|
|
2026-04-19 22:01:55 +08:00
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2026-05-04 02:54:37 +08:00
|
|
|
|
asyncio.run(main())
|