2026-04-21 11:02:16 +08:00
|
|
|
|
"""
|
|
|
|
|
|
RAG Indexer 配置管理模块
|
|
|
|
|
|
集中管理所有环境变量配置项,避免散落在各个文件中
|
2026-04-21 18:41:14 +08:00
|
|
|
|
所有配置直接从环境变量读取,无默认值,避免配置混乱
|
|
|
|
|
|
需要类型转换的配置在此处理
|
2026-04-21 11:02:16 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import os
|
2026-04-22 01:34:34 +08:00
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
|
|
load_dotenv()
|
2026-04-21 18:41:14 +08:00
|
|
|
|
|
|
|
|
|
|
# ========== 辅助函数:类型转换 ==========
|
|
|
|
|
|
def _get_str(key: str) -> str | None:
|
|
|
|
|
|
"""获取字符串配置"""
|
|
|
|
|
|
return os.getenv(key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_int(key: str) -> int | None:
|
|
|
|
|
|
"""获取整数配置,自动转换"""
|
|
|
|
|
|
value = os.getenv(key)
|
|
|
|
|
|
if value is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
return int(value)
|
|
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
|
|
pass
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_list_str(key: str, default: list[str] | None = None) -> list[str]:
|
|
|
|
|
|
"""获取字符串列表配置,从逗号分隔的字符串解析"""
|
|
|
|
|
|
value = os.getenv(key)
|
|
|
|
|
|
if value is not None:
|
|
|
|
|
|
return [item.strip() for item in value.split(",") if item.strip()]
|
|
|
|
|
|
return default or []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ========== 向量数据库配置(URL + API密钥 配对) ==========
|
|
|
|
|
|
QDRANT_URL = _get_str("QDRANT_URL")
|
|
|
|
|
|
QDRANT_API_KEY = _get_str("QDRANT_API_KEY")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ========== 嵌入服务配置(URL + API密钥 配对) ==========
|
|
|
|
|
|
LLAMACPP_EMBEDDING_URL = _get_str("LLAMACPP_EMBEDDING_URL")
|
|
|
|
|
|
LLAMACPP_API_KEY = _get_str("LLAMACPP_API_KEY")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ========== 文档存储配置(分离配置 + 完整URI) ==========
|
|
|
|
|
|
# 分离配置(优先使用)
|
|
|
|
|
|
DB_HOST = _get_str("DB_HOST")
|
|
|
|
|
|
DB_PORT = _get_int("DB_PORT")
|
|
|
|
|
|
DB_USER = _get_str("DB_USER")
|
|
|
|
|
|
DB_PASSWORD = _get_str("DB_PASSWORD")
|
|
|
|
|
|
DB_NAME = _get_str("DB_NAME")
|
|
|
|
|
|
|
|
|
|
|
|
# 完整连接字符串(直接从环境变量读取)
|
|
|
|
|
|
DB_URI = _get_str("DB_URI")
|
2026-04-22 13:28:14 +08:00
|
|
|
|
if not DB_URI and all([DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME]):
|
|
|
|
|
|
DB_URI = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}?sslmode=disable"
|
2026-04-21 18:41:14 +08:00
|
|
|
|
# 文档存储 URI(直接从环境变量读取,默认同 DB_URI)
|
|
|
|
|
|
DOCSTORE_URI = _get_str("DOCSTORE_URI") or DB_URI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ========== 文档加载器配置(unstructured 库) ==========
|
|
|
|
|
|
# OCR 语言列表(逗号分隔,如 "chi_sim,eng")
|
|
|
|
|
|
RAG_OCR_LANGUAGES = _get_list_str("RAG_OCR_LANGUAGES", ["chi_sim", "eng"])
|
|
|
|
|
|
|
|
|
|
|
|
# 文档主语言列表(逗号分隔,如 "zh")
|
|
|
|
|
|
RAG_DOC_LANGUAGES = _get_list_str("RAG_DOC_LANGUAGES", ["zh"])
|
|
|
|
|
|
|
2026-04-21 11:02:16 +08:00
|
|
|
|
|
|
|
|
|
|
# ========== 索引器专用配置 ==========
|
|
|
|
|
|
# 默认索引存储路径
|
2026-04-21 18:41:14 +08:00
|
|
|
|
INDEX_STORAGE_PATH = _get_str("INDEX_STORAGE_PATH")
|