文件变更

This commit is contained in:
2026-04-20 14:05:57 +08:00
parent 3c906e91d9
commit 4e981e9dcf
28 changed files with 474 additions and 490 deletions

View File

@@ -16,6 +16,7 @@ from langchain_core.embeddings import Embeddings
from langchain_core.stores import BaseStore
from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter
from langchain_classic.retrievers import ParentDocumentRetriever
from qdrant_client.http.exceptions import ResponseHandlingException
from .loaders import DocumentLoader
from .splitters import SplitterType, get_splitter, SemanticChunkerAdapter
@@ -223,18 +224,26 @@ class IndexBuilder:
async def _add_batch_with_retry(self, batch: List[Document], batch_no: int) -> None:
"""添加批次,失败时自动重试(处理网络波动)。"""
max_retries = 3
max_retries = 5
base_delay = 2
for attempt in range(max_retries):
try:
await self.retriever.aadd_documents(batch) # type: ignore[union-attr]
logger.info("批次 %d 成功添加 %d 个文档", batch_no, len(batch))
return
except (RemoteProtocolError, ConnectionError, OSError) as e:
except (RemoteProtocolError, ConnectionError, OSError, ResponseHandlingException) as e:
if attempt == max_retries - 1:
logger.error("批次 %d 重试 %d 次后仍然失败: %s", batch_no, max_retries, e)
raise
logger.warning("批次 %d 连接断开,重试 (%d/%d): %s",
batch_no, attempt + 1, max_retries, e)
wait_time = base_delay * (2 ** attempt)
error_type = type(e).__name__
logger.warning(
"批次 %d 遇到网络异常 [%s]%d秒后重试 (%d/%d): %s",
batch_no, error_type, wait_time, attempt + 1, max_retries, e
)
self.vector_store.refresh_client()
await asyncio.sleep(1)
logger.debug("批次 %d 已刷新 Qdrant 客户端连接", batch_no)
await asyncio.sleep(wait_time)
# ---------- 信息获取方法 ----------
def get_collection_info(self) -> Any: