From a6813a5ab070af1b90c1ac42f3fe1f1e4b98502f Mon Sep 17 00:00:00 2001 From: root <953994191@qq.com> Date: Wed, 6 May 2026 17:08:47 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96:=20=E9=87=8D=E6=8E=92?= =?UTF-8?q?=E5=89=8D=E5=A2=9E=E5=8A=A0=E5=90=91=E9=87=8F=E5=88=9D=E7=AD=9B?= =?UTF-8?q?=EF=BC=8C=E5=8F=AA=E8=AE=A9=E5=89=8D20=E4=B8=AA=E8=BF=9B?= =?UTF-8?q?=E5=85=A5=E9=87=8D=E6=8E=92?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/rag/pipeline.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backend/app/rag/pipeline.py b/backend/app/rag/pipeline.py index 2d89801..b6f8727 100644 --- a/backend/app/rag/pipeline.py +++ b/backend/app/rag/pipeline.py @@ -70,6 +70,13 @@ class RAGPipeline: content_len = len(doc.page_content) info(f"[Pipeline] 子文档[{i}] 长度={content_len}字符") + # Step 1.5: 向量初筛(进入重排前先过滤) + vector_top_n = 20 + info(f"[Pipeline] Step 1.5: 向量初筛: 取前 {vector_top_n} 个(当前 {len(child_docs)} 个)") + if len(child_docs) > vector_top_n: + child_docs = child_docs[:vector_top_n] + info(f"[Pipeline] Step 1.5 完成: 向量初筛后 {len(child_docs)} 个") + # Step 2: 重排 info(f"[Pipeline] Step 2: 开始重排") if self.reranker: