修改配置
This commit is contained in:
@@ -11,6 +11,9 @@ from langchain_core.documents import Document
|
||||
from unstructured.documents.elements import Element
|
||||
from unstructured.partition.auto import partition
|
||||
|
||||
# 相对导入配置
|
||||
from .config import RAG_OCR_LANGUAGES, RAG_DOC_LANGUAGES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 模块加载时设置一次环境变量,避免重复设置
|
||||
@@ -47,8 +50,8 @@ class DocumentLoader:
|
||||
"""
|
||||
self.extract_images = extract_images
|
||||
self.strategy = strategy
|
||||
self.ocr_languages = ocr_languages or ["chi_sim", "eng"]
|
||||
self.languages = languages or ["zh"]
|
||||
self.ocr_languages = ocr_languages or RAG_OCR_LANGUAGES
|
||||
self.languages = languages or RAG_DOC_LANGUAGES
|
||||
self.include_page_breaks = include_page_breaks
|
||||
self.pdf_infer_table_structure = pdf_infer_table_structure
|
||||
self.partition_kwargs = partition_kwargs or {}
|
||||
|
||||
Reference in New Issue
Block a user