2026-05-04 04:28:32 +08:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""
|
|
|
|
|
测试 app/rag/retriever.py 里的混合检索函数
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
import os
|
|
|
|
|
import sys
|
|
|
|
|
|
2026-05-04 12:55:45 +08:00
|
|
|
from backend.app.rag.retriever import create_hybrid_retriever, create_parent_hybrid_retriever
|
2026-05-04 04:28:32 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_hybrid_retriever():
|
|
|
|
|
"""测试混合检索器"""
|
|
|
|
|
print("="*70)
|
|
|
|
|
print("测试 HybridRetriever...")
|
|
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
retriever = create_hybrid_retriever(collection_name="rag_documents", search_k=3)
|
|
|
|
|
results = retriever.invoke("黄双银")
|
|
|
|
|
|
|
|
|
|
print(f"\n找到 {len(results)} 个结果\n")
|
|
|
|
|
for i, doc in enumerate(results):
|
|
|
|
|
print(f"--- 结果 {i+1} ---")
|
|
|
|
|
print(doc.page_content[:200])
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_parent_hybrid_retriever():
|
|
|
|
|
"""测试父子混合检索器"""
|
|
|
|
|
print("\n" + "="*70)
|
|
|
|
|
print("测试 ParentHybridRetriever...")
|
|
|
|
|
print("="*70)
|
|
|
|
|
|
|
|
|
|
retriever = create_parent_hybrid_retriever(
|
|
|
|
|
collection_name="rag_documents",
|
|
|
|
|
search_k=3,
|
|
|
|
|
use_docstore=False
|
|
|
|
|
)
|
|
|
|
|
results = retriever.invoke("黄双银")
|
|
|
|
|
|
|
|
|
|
print(f"\n找到 {len(results)} 个结果\n")
|
|
|
|
|
for i, doc in enumerate(results):
|
|
|
|
|
print(f"--- 结果 {i+1} ---")
|
|
|
|
|
print(doc.page_content[:300])
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
test_hybrid_retriever()
|
|
|
|
|
test_parent_hybrid_retriever()
|