Files
ailine/tools/test/test_rag_indexer_result.py
root 9841f47432
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Failing after 6m53s
refactor: 重构RAG核心组件,简化代码结构和测试文件
2026-05-04 17:58:10 +08:00

132 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
简单的 RAG 检索测试
使用 app/rag/retriever 提供的功能
"""
import asyncio
from backend.app.rag.retriever import (
create_parent_hybrid_retriever,
create_hybrid_retriever
)
from backend.rag_core import QdrantHybridStore
# 统一的测试查询列表
TEST_QUERIES = [
"黄双银",
]
async def test_simple_vector_store_search():
"""测试:直接使用 QdrantHybridStore 的 asimilarity_search"""
print("="*80)
print("测试 1: QdrantHybridStore.asimilarity_search")
print("="*80)
vs = QdrantHybridStore(collection_name="rag_documents")
for query in TEST_QUERIES:
print(f"\n查询: {query}")
print("-" * 60)
docs = await vs.asimilarity_search(query, k=10)
if docs:
print(f"✓ 找到 {len(docs)} 个文档")
for i, doc in enumerate(docs, 1):
print(f"\n {i}. 来源: {doc.metadata.get('source', 'unknown')}")
preview = doc.page_content[:120].strip()
if len(doc.page_content) > 120:
preview += "..."
print(f" 内容: {preview}")
else:
print("✗ 未找到结果")
await vs.close_async_client()
print("\n" + "="*80)
async def test_hybrid_retriever():
"""测试HybridRetriever子文档检索"""
print("\n" + "="*80)
print("测试 2: HybridRetriever (子文档混合检索)")
print("="*80)
retriever = create_hybrid_retriever(
collection_name="rag_documents",
search_k=10
)
for query in TEST_QUERIES:
print(f"\n查询: {query}")
print("-" * 60)
docs = await retriever.ainvoke(query)
if docs:
print(f"✓ 找到 {len(docs)} 个子文档")
for i, doc in enumerate(docs, 1):
print(f"\n {i}. parent_id: {doc.metadata.get('parent_id', 'none')}")
preview = doc.page_content[:100].strip()
if len(doc.page_content) > 100:
preview += "..."
print(f" 内容: {preview}")
else:
print("✗ 未找到结果")
print("\n" + "="*80)
async def test_parent_hybrid_retriever():
"""测试ParentHybridRetriever父子文档混合检索"""
print("\n" + "="*80)
print("测试 3: ParentHybridRetriever (父子文档混合检索)")
print("="*80)
retriever = create_parent_hybrid_retriever(
collection_name="rag_documents",
search_k=10
)
for query in TEST_QUERIES:
print(f"\n查询: {query}")
print("-" * 60)
docs = await retriever.ainvoke(query)
if docs:
print(f"✓ 找到 {len(docs)} 个父文档")
for i, doc in enumerate(docs, 1):
print(f"\n {i}. 来源: {doc.metadata.get('source', 'unknown')}")
preview = doc.page_content[:150].strip()
if len(doc.page_content) > 150:
preview += "..."
print(f" 内容:\n {preview}")
else:
print("✗ 未找到结果")
print("\n" + "="*80)
async def main():
"""主测试函数"""
print("\n" + "="*80)
print("RAG 检索功能测试")
print("="*80)
# 测试 1: 直接使用 vector store
await test_simple_vector_store_search()
# 测试 2: HybridRetriever
await test_hybrid_retriever()
# 测试 3: ParentHybridRetriever
await test_parent_hybrid_retriever()
print("\n🎉 所有测试完成!")
if __name__ == "__main__":
asyncio.run(main())