修改配置

This commit is contained in:
2026-04-21 18:41:14 +08:00
parent 08826c70a3
commit e2eaac9498
12 changed files with 393 additions and 148 deletions

View File

@@ -11,6 +11,9 @@ from langchain_core.documents import Document
from unstructured.documents.elements import Element
from unstructured.partition.auto import partition
# 相对导入配置
from .config import RAG_OCR_LANGUAGES, RAG_DOC_LANGUAGES
logger = logging.getLogger(__name__)
# 模块加载时设置一次环境变量,避免重复设置
@@ -47,8 +50,8 @@ class DocumentLoader:
"""
self.extract_images = extract_images
self.strategy = strategy
self.ocr_languages = ocr_languages or ["chi_sim", "eng"]
self.languages = languages or ["zh"]
self.ocr_languages = ocr_languages or RAG_OCR_LANGUAGES
self.languages = languages or RAG_DOC_LANGUAGES
self.include_page_breaks = include_page_breaks
self.pdf_infer_table_structure = pdf_infer_table_structure
self.partition_kwargs = partition_kwargs or {}