2026-04-19 22:01:55 +08:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""
|
|
|
|
|
测试重构后的 IndexBuilder 和 RAGRetriever
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import asyncio
|
|
|
|
|
import os
|
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
# 添加项目根目录到 Python 路径
|
2026-04-21 16:27:05 +08:00
|
|
|
project_root = os.path.join(os.path.dirname(__file__), "..")
|
|
|
|
|
sys.path.insert(0, project_root)
|
2026-04-19 22:01:55 +08:00
|
|
|
|
2026-04-21 16:27:05 +08:00
|
|
|
from rag_indexer.index_builder import IndexBuilder
|
|
|
|
|
from rag_indexer.splitters import SplitterType
|
2026-04-19 22:01:55 +08:00
|
|
|
|
|
|
|
|
async def test_index_builder():
|
|
|
|
|
"""测试索引构建功能"""
|
|
|
|
|
print("测试索引构建功能...")
|
|
|
|
|
|
|
|
|
|
# 创建 IndexBuilder 实例
|
|
|
|
|
builder = IndexBuilder(
|
2026-04-21 23:15:35 +08:00
|
|
|
collection_name="rag_documents",
|
2026-04-19 22:01:55 +08:00
|
|
|
splitter_type=SplitterType.PARENT_CHILD,
|
|
|
|
|
parent_chunk_size=1000,
|
|
|
|
|
child_chunk_size=200
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 测试文档路径
|
2026-04-21 23:15:35 +08:00
|
|
|
test_file = os.path.join(os.path.dirname(__file__), "..", "data", "user_docs", "doublestory.txt")
|
2026-04-19 22:01:55 +08:00
|
|
|
|
|
|
|
|
if os.path.exists(test_file):
|
|
|
|
|
# 构建索引
|
|
|
|
|
print(f"正在为文件 {test_file} 构建索引...")
|
|
|
|
|
processed = await builder.build_from_file(test_file)
|
|
|
|
|
print(f"索引构建完成,处理了 {processed} 个文档")
|
|
|
|
|
|
|
|
|
|
# 获取集合信息
|
|
|
|
|
info = builder.get_collection_info()
|
|
|
|
|
print(f"集合信息: {info}")
|
|
|
|
|
else:
|
|
|
|
|
print(f"测试文件不存在: {test_file}")
|
2026-04-21 20:49:10 +08:00
|
|
|
|
2026-04-19 22:01:55 +08:00
|
|
|
# 关闭资源
|
|
|
|
|
builder.close()
|
|
|
|
|
print("\n测试完成")
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
asyncio.run(test_index_builder())
|