#!/usr/bin/env python3 """ 测试重构后的 IndexBuilder 和 RAGRetriever """ import asyncio import os import sys # 添加项目根目录到 Python 路径 project_root = os.path.join(os.path.dirname(__file__), "..") sys.path.insert(0, project_root) from rag_indexer.index_builder import IndexBuilder from rag_indexer.splitters import SplitterType async def test_index_builder(): """测试索引构建功能""" print("测试索引构建功能...") # 创建 IndexBuilder 实例 builder = IndexBuilder( collection_name="rag_documents", splitter_type=SplitterType.PARENT_CHILD, parent_chunk_size=1000, child_chunk_size=200 ) # 测试文档路径 test_file = os.path.join(os.path.dirname(__file__), "..", "data", "user_docs", "doublestory.txt") if os.path.exists(test_file): # 构建索引 print(f"正在为文件 {test_file} 构建索引...") processed = await builder.build_from_file(test_file) print(f"索引构建完成,处理了 {processed} 个文档") # 获取集合信息 info = builder.get_collection_info() print(f"集合信息: {info}") else: print(f"测试文件不存在: {test_file}") # 关闭资源 builder.close() print("\n测试完成") if __name__ == "__main__": asyncio.run(test_index_builder())