Files
ailine/test/test_rag_indexer_result.py
2026-04-21 16:27:05 +08:00

84 lines
2.7 KiB
Python

#!/usr/bin/env python3
"""
测试重构后的 IndexBuilder 和 RAGRetriever
"""
import asyncio
import os
import sys
# 添加项目根目录到 Python 路径
project_root = os.path.join(os.path.dirname(__file__), "..")
sys.path.insert(0, project_root)
from rag_indexer.index_builder import IndexBuilder
from rag_indexer.splitters import SplitterType
async def test_index_builder():
"""测试索引构建功能"""
print("测试索引构建功能...")
# 创建 IndexBuilder 实例
builder = IndexBuilder(
collection_name="test_collection",
splitter_type=SplitterType.PARENT_CHILD,
parent_chunk_size=1000,
child_chunk_size=200
)
# 测试文档路径
test_file = os.path.join(os.path.dirname(__file__), "..", "data", "user_docs", "a.txt")
if os.path.exists(test_file):
# 构建索引
print(f"正在为文件 {test_file} 构建索引...")
processed = await builder.build_from_file(test_file)
print(f"索引构建完成,处理了 {processed} 个文档")
# 获取集合信息
info = builder.get_collection_info()
print(f"集合信息: {info}")
else:
print(f"测试文件不存在: {test_file}")
# 测试搜索功能
print("\n测试搜索功能...")
try:
results = builder.search("吕布", k=3)
print(f"搜索结果数量: {len(results)}")
for i, result in enumerate(results):
print(f"\n结果 {i+1}:")
print(f"内容: {result.page_content[:100]}...")
except Exception as e:
print(f"搜索测试失败: {e}")
# 测试带父块上下文的搜索
print("\n测试带父块上下文的搜索...")
try:
results = await builder.search_with_parent_context("吕布", k=3)
print(f"搜索结果数量: {len(results)}")
for i, result in enumerate(results):
print(f"\n结果 {i+1}:")
print(f"内容: {result.page_content[:100]}...")
except Exception as e:
print(f"带父块上下文的搜索测试失败: {e}")
# 测试统一检索接口
print("\n测试统一检索接口...")
try:
# 返回父块
results_parent = await builder.retrieve("吕布", return_parent=True)
print(f"返回父块的结果数量: {len(results_parent)}")
# 返回子块
results_child = await builder.retrieve("吕布", return_parent=False)
print(f"返回子块的结果数量: {len(results_child)}")
except Exception as e:
print(f"统一检索接口测试失败: {e}")
# 关闭资源
builder.close()
print("\n测试完成")
if __name__ == "__main__":
asyncio.run(test_index_builder())