diff --git a/backend/app/rag/pipeline.py b/backend/app/rag/pipeline.py index 2d89801..b6f8727 100644 --- a/backend/app/rag/pipeline.py +++ b/backend/app/rag/pipeline.py @@ -70,6 +70,13 @@ class RAGPipeline: content_len = len(doc.page_content) info(f"[Pipeline] 子文档[{i}] 长度={content_len}字符") + # Step 1.5: 向量初筛(进入重排前先过滤) + vector_top_n = 20 + info(f"[Pipeline] Step 1.5: 向量初筛: 取前 {vector_top_n} 个(当前 {len(child_docs)} 个)") + if len(child_docs) > vector_top_n: + child_docs = child_docs[:vector_top_n] + info(f"[Pipeline] Step 1.5 完成: 向量初筛后 {len(child_docs)} 个") + # Step 2: 重排 info(f"[Pipeline] Step 2: 开始重排") if self.reranker: