refactor: 重构RAG核心组件,简化代码结构和测试文件
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Failing after 6m53s
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Failing after 6m53s
This commit is contained in:
@@ -45,6 +45,11 @@ class IndexBuilderConfig:
|
||||
child_chunk_size: int = 200
|
||||
child_chunk_overlap: int = 20
|
||||
child_splitter_type: SplitterType = SplitterType.SEMANTIC # 子块默认语义切分
|
||||
# 子块语义切分参数
|
||||
child_buffer_size: int = 1
|
||||
child_breakpoint_threshold_type: str = "percentile"
|
||||
child_breakpoint_threshold_amount: float = 90 # 降低阈值,让切分更激进
|
||||
child_min_chunk_size: int = 50 # 降低最小块大小
|
||||
|
||||
# 检索参数
|
||||
search_k: int = 5
|
||||
@@ -86,7 +91,6 @@ class IndexBuilder:
|
||||
# 初始化向量存储(自动支持稠密+稀疏混合检索)
|
||||
self.vector_store = QdrantHybridStore(
|
||||
collection_name=config.collection_name,
|
||||
embeddings=self.embeddings,
|
||||
)
|
||||
logger.info("✅ 混合检索向量存储初始化成功(稠密+BM25稀疏)")
|
||||
|
||||
@@ -125,6 +129,10 @@ class IndexBuilder:
|
||||
self.child_splitter = get_splitter(
|
||||
SplitterType.SEMANTIC,
|
||||
embeddings=self.embeddings,
|
||||
buffer_size=cfg.child_buffer_size,
|
||||
breakpoint_threshold_type=cfg.child_breakpoint_threshold_type,
|
||||
breakpoint_threshold_amount=cfg.child_breakpoint_threshold_amount,
|
||||
min_chunk_size=cfg.child_min_chunk_size,
|
||||
**cfg.extra_splitter_kwargs
|
||||
)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user