refactor: 重构RAG核心组件,简化代码结构和测试文件
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Failing after 6m53s

This commit is contained in:
2026-05-04 17:58:10 +08:00
parent a07e398739
commit 9841f47432
31 changed files with 578 additions and 1496 deletions

View File

@@ -45,6 +45,11 @@ class IndexBuilderConfig:
child_chunk_size: int = 200
child_chunk_overlap: int = 20
child_splitter_type: SplitterType = SplitterType.SEMANTIC # 子块默认语义切分
# 子块语义切分参数
child_buffer_size: int = 1
child_breakpoint_threshold_type: str = "percentile"
child_breakpoint_threshold_amount: float = 90 # 降低阈值,让切分更激进
child_min_chunk_size: int = 50 # 降低最小块大小
# 检索参数
search_k: int = 5
@@ -86,7 +91,6 @@ class IndexBuilder:
# 初始化向量存储(自动支持稠密+稀疏混合检索)
self.vector_store = QdrantHybridStore(
collection_name=config.collection_name,
embeddings=self.embeddings,
)
logger.info("✅ 混合检索向量存储初始化成功(稠密+BM25稀疏")
@@ -125,6 +129,10 @@ class IndexBuilder:
self.child_splitter = get_splitter(
SplitterType.SEMANTIC,
embeddings=self.embeddings,
buffer_size=cfg.child_buffer_size,
breakpoint_threshold_type=cfg.child_breakpoint_threshold_type,
breakpoint_threshold_amount=cfg.child_breakpoint_threshold_amount,
min_chunk_size=cfg.child_min_chunk_size,
**cfg.extra_splitter_kwargs
)
else: