#!/usr/bin/env python3 """ 测试重构后的 IndexBuilder 和 RAGRetriever """ import asyncio import os import sys # 添加项目根目录到 Python 路径 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) from ..index_builder import IndexBuilder from ..splitters import SplitterType async def test_index_builder(): """测试索引构建功能""" print("测试索引构建功能...") # 创建 IndexBuilder 实例 builder = IndexBuilder( collection_name="test_collection", splitter_type=SplitterType.PARENT_CHILD, parent_chunk_size=1000, child_chunk_size=200 ) # 测试文档路径 test_file = os.path.join(os.path.dirname(__file__), "..", "data", "corpus", "三国演义.txt") if os.path.exists(test_file): # 构建索引 print(f"正在为文件 {test_file} 构建索引...") processed = await builder.build_from_file(test_file) print(f"索引构建完成,处理了 {processed} 个文档") # 获取集合信息 info = builder.get_collection_info() print(f"集合信息: {info}") else: print(f"测试文件不存在: {test_file}") # 测试搜索功能 print("\n测试搜索功能...") try: results = builder.search("吕布", k=3) print(f"搜索结果数量: {len(results)}") for i, result in enumerate(results): print(f"\n结果 {i+1}:") print(f"内容: {result.page_content[:100]}...") except Exception as e: print(f"搜索测试失败: {e}") # 测试带父块上下文的搜索 print("\n测试带父块上下文的搜索...") try: results = await builder.search_with_parent_context("吕布", k=3) print(f"搜索结果数量: {len(results)}") for i, result in enumerate(results): print(f"\n结果 {i+1}:") print(f"内容: {result.page_content[:100]}...") except Exception as e: print(f"带父块上下文的搜索测试失败: {e}") # 测试统一检索接口 print("\n测试统一检索接口...") try: # 返回父块 results_parent = await builder.retrieve("吕布", return_parent=True) print(f"返回父块的结果数量: {len(results_parent)}") # 返回子块 results_child = await builder.retrieve("吕布", return_parent=False) print(f"返回子块的结果数量: {len(results_child)}") except Exception as e: print(f"统一检索接口测试失败: {e}") # 关闭资源 builder.close() print("\n测试完成") if __name__ == "__main__": asyncio.run(test_index_builder())