修改配置

This commit is contained in:
2026-04-21 18:41:14 +08:00
parent 08826c70a3
commit e2eaac9498
12 changed files with 393 additions and 148 deletions

View File

@@ -10,7 +10,38 @@ ZHIPUAI_API_KEY=your_zhipuai_api_key_here
DEEPSEEK_API_KEY=your_deepseek_api_key_here DEEPSEEK_API_KEY=your_deepseek_api_key_here
# llama.cpp 服务认证 Token与容器启动参数一致 # llama.cpp 服务认证 Token与容器启动参数一致
LLAMACPP_API_KEY=token-abc123 LLAMACPP_API_KEY=huang1998
# -----------------------------------------------------------------------------
# PostgreSQL 数据库配置(分离配置,易于管理)
# -----------------------------------------------------------------------------
DB_HOST=115.190.121.151
DB_PORT=5432
DB_USER=postgres
DB_PASSWORD=huang1998
DB_NAME=langgraph_db
# 完整连接字符串(也支持直接配置,优先使用分离配置)
DB_URI=postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable
# -----------------------------------------------------------------------------
# Qdrant 向量数据库配置URL + API密钥 配对)
# -----------------------------------------------------------------------------
QDRANT_URL=http://115.190.121.151:6333
QDRANT_API_KEY=huang1998
QDRANT_COLLECTION_NAME=mem0_user_memories
# -----------------------------------------------------------------------------
# llama.cpp 服务配置URL + API密钥 配对)
# -----------------------------------------------------------------------------
# 主 LLM 服务 (Gemma-4-E2B GGUF) - 端口 8081
VLLM_BASE_URL=http://host.docker.internal:18000/v1
# Embedding 服务 (embeddinggemma-300M GGUF) - 端口 8082
LLAMACPP_EMBEDDING_URL=http://host.docker.internal:18001/v1
# LLAMACPP_API_KEY=huang1998 (已在上面配置)
# Reranker 服务 (bge-reranker-v2-m3) - 端口 8083
LLAMACPP_RERANKER_URL=http://host.docker.internal:18002/v1
# ⭐ 日志调试配置(部署时可灵活调整) # ⭐ 日志调试配置(部署时可灵活调整)
# ============================================================================= # =============================================================================
@@ -28,53 +59,17 @@ DEBUG=false
# false: 关闭追踪,减少日志量 # false: 关闭追踪,减少日志量
ENABLE_GRAPH_TRACE=false ENABLE_GRAPH_TRACE=false
# -----------------------------------------------------------------------------
# llama.cpp 服务配置
# -----------------------------------------------------------------------------
# 主 LLM 服务 (Gemma-4-E2B GGUF) - 端口 8081
VLLM_BASE_URL=http://host.docker.internal:18000/v1
# Embedding 服务 (embeddinggemma-300M GGUF) - 端口 8082
LLAMACPP_EMBEDDING_URL=http://host.docker.internal:18001/v1
# Reranker 服务 (bge-reranker-v2-m3) - 端口 8083
LLAMACPP_RERANKER_URL=http://host.docker.internal:18002/v1
# -----------------------------------------------------------------------------
# Mem0 记忆层配置
# -----------------------------------------------------------------------------
# Qdrant 向量数据库(远程服务器上的独立容器)
QDRANT_URL=http://115.190.121.151:6333
QDRANT_COLLECTION_NAME=mem0_user_memories
# -----------------------------------------------------------------------------
# 数据库配置
# -----------------------------------------------------------------------------
# PostgreSQL 连接字符串(远程服务器上的独立容器)
DB_URI=postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# 前端配置 # 前端配置
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# Docker Compose 内部网络,使用服务名 'backend' # Docker Compose 内部网络,使用服务名 'backend'
API_URL=http://backend:8083/chat API_URL=http://backend:8079/chat
# ⭐ 前端通信地址Docker 内部网络) # ⭐ 前端通信地址Docker 内部网络)
# 注意:这里只需要域名和端口,不需要 /chat 路径 # 注意:这里只需要域名和端口,不需要 /chat 路径
- API_URL=http://backend:8083 # API_URL=http://backend:8079
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# 应用行为配置 # 应用行为配置
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
MEMORY_SUMMARIZE_INTERVAL=10 MEMORY_SUMMARIZE_INTERVAL=10
# -----------------------------------------------------------------------------
# unstructured 库 spaCy 模型配置
# -----------------------------------------------------------------------------
# 指定文档解析使用的语言: eng (英语) 或 zho (中文)
UNSTRUCTURED_LANGUAGE=zho
# 指定 spaCy 模型名称(需与 UNSTRUCTURED_LANGUAGE 对应)
# eng -> en_core_web_sm
# zho -> zh_core_web_sm
SPACY_MODEL=zh_core_web_sm

View File

@@ -1,50 +1,89 @@
""" """
环境变量集中管理模块 环境变量集中管理模块
所有配置项统一定义,避免散落在各个文件中 所有配置项统一定义,避免散落在各个文件中
配置分组相关配置放在一起URL 和 API Key 配对
所有配置直接从环境变量读取,无默认值,避免配置混乱
需要类型转换的配置在此处理
""" """
import os import os
# ========== Graph 执行追踪配置 ========== # ========== 辅助函数:类型转换 ==========
# 是否启用 Graph 流转追踪(通过环境变量控制) def _get_str(key: str) -> str | None:
ENABLE_GRAPH_TRACE = os.getenv("ENABLE_GRAPH_TRACE", "true").lower() == "true" """获取字符串配置"""
return os.getenv(key)
# ========== 记忆提取配置 ==========
# 记忆提取间隔:每 N 轮对话生成一次摘要
MEMORY_SUMMARIZE_INTERVAL = int(os.getenv("MEMORY_SUMMARIZE_INTERVAL", "10"))
# ========== Mem0 记忆层配置 ========== def _get_int(key: str) -> int | None:
# Qdrant 向量数据库地址 """获取整数配置,自动转换"""
QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333") value = os.getenv(key)
QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME", "mem0_user_memories") if value is not None:
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "your-qdrant-api-key") try:
return int(value)
except (ValueError, TypeError):
pass
return None
# ========== llm 配置 ==========
# LLM 模型配置
VLLM_BASE_URL = os.getenv("VLLM_BASE_URL", "http://127.0.0.1:8081/v1")
LLM_API_KEY = os.getenv("LLM_API_KEY", "your-ai-api-key")
# llama.cpp Embedding 服务地址 (用于 Mem0 的向量化) def _get_bool(key: str) -> bool | None:
LLAMACPP_EMBEDDING_URL = os.getenv("LLAMACPP_EMBEDDING_URL", "http://127.0.0.1:8082/v1") """获取布尔配置,自动转换"""
LLAMACPP_API_KEY = os.getenv("LLAMACPP_API_KEY", "your-llamacpp-api-key") value = os.getenv(key)
if value is not None:
return value.lower() in ("true", "1", "yes", "on")
return None
# ========== 后端服务配置 ==========
# 数据库连接字符串
DB_URI = os.getenv(
"DB_URI",
"postgresql://postgres:huang1998@ai-postgres:5432/langgraph_db?sslmode=disable"
)
# 后端服务端口
BACKEND_PORT = int(os.getenv("BACKEND_PORT", "8079"))
# ========== 日志配置 ==========
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
DEBUG = os.getenv("DEBUG", "false").lower() == "true"
# ========== Reranker 服务配置 ==========
LLAMACPP_RERANKER_URL = os.getenv("LLAMACPP_RERANKER_URL", "http://127.0.0.1:8083")
# ========== 第三方 API 密钥 ========== # ========== 第三方 API 密钥 ==========
ZHIPUAI_API_KEY = os.getenv("ZHIPUAI_API_KEY", "") ZHIPUAI_API_KEY = _get_str("ZHIPUAI_API_KEY")
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "") DEEPSEEK_API_KEY = _get_str("DEEPSEEK_API_KEY")
# ========== llama.cpp 服务配置URL + API密钥 配对) ==========
# 主 LLM 服务
VLLM_BASE_URL = _get_str("VLLM_BASE_URL")
LLM_API_KEY = _get_str("LLAMACPP_API_KEY")
# Embedding 服务 (用于 Mem0 的向量化)
LLAMACPP_EMBEDDING_URL = _get_str("LLAMACPP_EMBEDDING_URL")
LLAMACPP_API_KEY = _get_str("LLAMACPP_API_KEY")
# Reranker 服务
LLAMACPP_RERANKER_URL = _get_str("LLAMACPP_RERANKER_URL")
# ========== Qdrant 向量数据库配置URL + API密钥 配对) ==========
QDRANT_URL = _get_str("QDRANT_URL")
QDRANT_API_KEY = _get_str("QDRANT_API_KEY")
QDRANT_COLLECTION_NAME = _get_str("QDRANT_COLLECTION_NAME")
# ========== PostgreSQL 数据库配置(分离配置 + 完整URI ==========
# 分离配置(优先使用)
DB_HOST = _get_str("DB_HOST")
DB_PORT = _get_int("DB_PORT")
DB_USER = _get_str("DB_USER")
DB_PASSWORD = _get_str("DB_PASSWORD")
DB_NAME = _get_str("DB_NAME")
# 完整连接字符串(直接从环境变量读取)
DB_URI = _get_str("DB_URI")
# ========== 后端服务配置 ==========
BACKEND_PORT = _get_int("BACKEND_PORT")
# ========== Mem0 记忆层配置 ==========
# 记忆提取间隔:每 N 轮对话生成一次摘要
MEMORY_SUMMARIZE_INTERVAL = _get_int("MEMORY_SUMMARIZE_INTERVAL")
# ========== Graph 执行追踪配置 ==========
# 是否启用 Graph 流转追踪(通过环境变量控制)
ENABLE_GRAPH_TRACE = _get_bool("ENABLE_GRAPH_TRACE")
# ========== 日志配置 ==========
LOG_LEVEL = _get_str("LOG_LEVEL")
DEBUG = _get_bool("DEBUG")

View File

@@ -1,24 +1,54 @@
""" """
RAG Core 配置管理模块 RAG Core 配置管理模块
集中管理所有环境变量配置项,避免散落在各个文件中 集中管理所有环境变量配置项,避免散落在各个文件中
所有配置直接从环境变量读取,无默认值,避免配置混乱
需要类型转换的配置在此处理
""" """
import os import os
# ========== 向量数据库配置 ==========
QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "")
# ========== 嵌入服务配置 ========== # ========== 辅助函数:类型转换 ==========
LLAMACPP_EMBEDDING_URL = os.getenv("LLAMACPP_EMBEDDING_URL", "http://127.0.0.1:8082") def _get_str(key: str) -> str | None:
LLAMACPP_API_KEY = os.getenv("LLAMACPP_API_KEY", "") """获取字符串配置"""
return os.getenv(key)
def _get_int(key: str) -> int | None:
"""获取整数配置,自动转换"""
value = os.getenv(key)
if value is not None:
try:
return int(value)
except (ValueError, TypeError):
pass
return None
# ========== 向量数据库配置URL + API密钥 配对) ==========
QDRANT_URL = _get_str("QDRANT_URL")
QDRANT_API_KEY = _get_str("QDRANT_API_KEY")
# ========== 嵌入服务配置URL + API密钥 配对) ==========
LLAMACPP_EMBEDDING_URL = _get_str("LLAMACPP_EMBEDDING_URL")
LLAMACPP_API_KEY = _get_str("LLAMACPP_API_KEY")
# ========== 文档存储配置(分离配置 + 完整URI ==========
# 分离配置(优先使用)
DB_HOST = _get_str("DB_HOST")
DB_PORT = _get_int("DB_PORT")
DB_USER = _get_str("DB_USER")
DB_PASSWORD = _get_str("DB_PASSWORD")
DB_NAME = _get_str("DB_NAME")
# 完整连接字符串(直接从环境变量读取)
DB_URI = _get_str("DB_URI")
# 文档存储 URI直接从环境变量读取默认同 DB_URI
DOCSTORE_URI = _get_str("DOCSTORE_URI") or DB_URI
# ========== 文档存储配置 ==========
DB_URI = os.getenv(
"DB_URI",
"postgresql://postgres:***@ai-postgres:5432/langgraph_db?sslmode=disable"
)
DOCSTORE_URI = os.getenv("DOCSTORE_URI", DB_URI)
# ========== 其他配置 ========== # ========== 其他配置 ==========
# 可以在此添加其他 RAG Core 专用的配置项 # 可以在此添加其他 RAG Core 专用的配置项

View File

@@ -5,7 +5,7 @@ WORKDIR /app
# ============================================================================= # =============================================================================
# 非敏感环境变量(固化在镜像中,无需通过 .env 配置) # 非敏感环境变量(固化在镜像中,无需通过 .env 配置)
# ============================================================================= # =============================================================================
ENV PYTHONPATH=/app:/app/backend ENV PYTHONPATH=/app
# llama.cpp 服务配置(本地部署标准端口) # llama.cpp 服务配置(本地部署标准端口)
ENV VLLM_BASE_URL=http://host.docker.internal:18000/v1 ENV VLLM_BASE_URL=http://host.docker.internal:18000/v1
@@ -19,10 +19,6 @@ ENV QDRANT_COLLECTION_NAME=mem0_user_memories
ENV MEMORY_SUMMARIZE_INTERVAL=10 ENV MEMORY_SUMMARIZE_INTERVAL=10
ENV ENABLE_GRAPH_TRACE=false ENV ENABLE_GRAPH_TRACE=false
# unstructured 库 spaCy 模型配置
ENV UNSTRUCTURED_LANGUAGE=eng
ENV SPACY_MODEL=en_core_web_sm
# 日志配置 # 日志配置
ENV LOG_LEVEL=WARNING ENV LOG_LEVEL=WARNING
ENV DEBUG=false ENV DEBUG=false

View File

@@ -1,7 +1,4 @@
services: services:
# ⭐ PostgreSQL 和 Qdrant 已迁移到远程服务器 (115.190.121.151)
# 不再需要在本地 Docker Compose 中运行这些服务
backend: backend:
build: build:
context: .. # 构建上下文为项目根目录 context: .. # 构建上下文为项目根目录
@@ -18,12 +15,18 @@ services:
- DEBUG=${DEBUG:-false} - DEBUG=${DEBUG:-false}
- ENABLE_GRAPH_TRACE=${ENABLE_GRAPH_TRACE:-false} - ENABLE_GRAPH_TRACE=${ENABLE_GRAPH_TRACE:-false}
# ⭐ 基础设施配置:固化在 compose 文件中 # ⭐ 基础设施配置:从 .env 读取敏感信息
# PostgreSQL 连接(远程服务器) # PostgreSQL 连接(远程服务器)- 分离凭据配置
- DB_URI=postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable - DB_HOST=115.190.121.151
- DB_PORT=5432
# Qdrant 向量数据库(远程服务器) - DB_USER=postgres
- DB_PASSWORD=${DB_PASSWORD}
- DB_NAME=langgraph_db
# Qdrant 向量数据库(远程服务器)- 配对配置
- QDRANT_URL=http://115.190.121.151:6333 - QDRANT_URL=http://115.190.121.151:6333
- QDRANT_API_KEY=${QDRANT_API_KEY}
- QDRANT_COLLECTION_NAME=mem0_user_memories
# 前端通信地址Docker 内部网络) # 前端通信地址Docker 内部网络)
- API_URL=http://backend:8079/chat - API_URL=http://backend:8079/chat

View File

@@ -1,6 +1,7 @@
""" """
前端配置管理模块 前端配置管理模块
集中管理所有配置项,支持环境变量覆盖 集中管理所有配置项,支持环境变量覆盖
需要类型转换的配置在此处理
""" """
import os import os
@@ -12,6 +13,31 @@ from dotenv import load_dotenv
load_dotenv() load_dotenv()
# ========== 辅助函数:类型转换 ==========
def _get_str(key: str) -> str | None:
"""获取字符串配置"""
return os.getenv(key)
def _get_int(key: str, default: int = 0) -> int:
"""获取整数配置,自动转换"""
value = os.getenv(key)
if value is not None:
try:
return int(value)
except (ValueError, TypeError):
pass
return default
def _get_bool(key: str, default: bool = False) -> bool:
"""获取布尔配置,自动转换"""
value = os.getenv(key)
if value is not None:
return value.lower() in ("true", "1", "yes", "on")
return default
@dataclass @dataclass
class FrontendConfig: class FrontendConfig:
"""前端配置类 - 统一管理所有配置项""" """前端配置类 - 统一管理所有配置项"""
@@ -19,51 +45,55 @@ class FrontendConfig:
# ==================== API 配置 ==================== # ==================== API 配置 ====================
api_base: str = "" api_base: str = ""
# ==================== 页面配置 ==================== # ==================== 页面配置(固定值,无需环境变量) ====================
page_title: str = "AI 个人助手" page_title: str = "AI 个人助手"
page_icon: str = "🤖" page_icon: str = "🤖"
layout: str = "wide" layout: str = "wide"
# ==================== 模型配置 ==================== # ==================== 模型配置(固定值,无需环境变量) ====================
default_model: str = "local" # 更改为local作为默认模型 default_model: str = "local"
model_options: Optional[dict] = None model_options: Optional[dict] = None
# ==================== 用户配置 ==================== # ==================== 用户配置(固定值,无需环境变量) ====================
default_user_id: str = "default_user" default_user_id: str = "default_user"
# ==================== 历史记录配置 ==================== # ==================== 历史记录配置(固定值,无需环境变量) ====================
history_limit: int = 50 history_limit: int = 50
summary_max_length: int = 30 summary_max_length: int = 30
# ==================== 流式响应配置 ==================== # ==================== 流式响应配置(固定值,无需环境变量) ====================
stream_timeout: int = 120 stream_timeout: int = 120
# ==================== 日志配置 ====================
log_level: str = ""
debug: bool = False
def __post_init__(self): def __post_init__(self):
"""初始化后处理 - 设置默认值和加载环境变量""" """初始化后处理 - 设置默认值和加载环境变量"""
if self.model_options is None: if self.model_options is None:
self.model_options = { self.model_options = {
"local": "本地 llama.cppGemma-4", # 本地模型作为第一个 "local": "本地 llama.cppGemma-4",
"deepseek": "DeepSeek V3.2(在线)", # DeepSeek 作为中间 "deepseek": "DeepSeek V3.2(在线)",
"zhipu": "智谱 GLM-4.7-Flash在线" # GLM-4.7 作为最后一个 "zhipu": "智谱 GLM-4.7-Flash在线"
} }
# 从环境变量加载配置 # 从环境变量加载配置(优先级最高)
self._load_from_env() self._load_from_env()
def _load_from_env(self): def _load_from_env(self):
"""从环境变量加载配置(优先级最高""" """从环境变量加载配置(仅加载必要的配置项"""
# API 地址(移除 /chat 后缀) # API 地址(移除 /chat 后缀)
# 优先级:环境变量 API_URL > 默认值 api_url = _get_str("API_URL")
api_url = os.getenv("API_URL", "http://127.0.0.1:8079") if api_url:
self.api_base = api_url.replace("/chat", "").rstrip("/") self.api_base = api_url.replace("/chat", "").rstrip("/")
# 日志配置 # 日志配置
self.log_level = os.getenv("LOG_LEVEL", "INFO").upper() log_level = _get_str("LOG_LEVEL")
self.debug = os.getenv("DEBUG", "false").lower() == "true" if log_level:
self.log_level = log_level.upper()
self.debug = _get_bool("DEBUG", False)
# 日志配置
self.log_level = os.getenv("LOG_LEVEL", "INFO").upper()
self.debug = os.getenv("DEBUG", "false").lower() == "true"
# 全局配置实例(单例模式) # 全局配置实例(单例模式)
config = FrontendConfig() config = FrontendConfig()

View File

@@ -26,8 +26,22 @@ Offline RAG Indexer module.
from .index_builder import IndexBuilder, IndexBuilderConfig, DocstoreConfig from .index_builder import IndexBuilder, IndexBuilderConfig, DocstoreConfig
from .loaders import DocumentLoader from .loaders import DocumentLoader
from .splitters import SplitterType, get_splitter from .splitters import SplitterType, get_splitter
from .config import (
QDRANT_URL,
QDRANT_API_KEY,
LLAMACPP_EMBEDDING_URL,
LLAMACPP_API_KEY,
DB_URI,
DOCSTORE_URI,
RAG_OCR_LANGUAGES,
RAG_DOC_LANGUAGES,
)
# 从 rag_core 重新导出常用组件 # 从 rag_core 重新导出常用组件
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
from rag_core import ( from rag_core import (
LlamaCppEmbedder, LlamaCppEmbedder,
QdrantVectorStore, QdrantVectorStore,
@@ -39,7 +53,7 @@ __version__ = "2.0.0"
__all__ = [ __all__ = [
# 核心构建器与配置 # 核心构建器与配置
"index_builder", "IndexBuilder",
"IndexBuilderConfig", "IndexBuilderConfig",
"DocstoreConfig", "DocstoreConfig",
@@ -50,6 +64,16 @@ __all__ = [
"SplitterType", "SplitterType",
"get_splitter", "get_splitter",
# 配置
"QDRANT_URL",
"QDRANT_API_KEY",
"LLAMACPP_EMBEDDING_URL",
"LLAMACPP_API_KEY",
"DB_URI",
"DOCSTORE_URI",
"RAG_OCR_LANGUAGES",
"RAG_DOC_LANGUAGES",
# 嵌入与向量存储 # 嵌入与向量存储
"LlamaCppEmbedder", "LlamaCppEmbedder",
"QdrantVectorStore", "QdrantVectorStore",

View File

@@ -6,13 +6,24 @@ import asyncio
import logging import logging
import sys import sys
from pathlib import Path from pathlib import Path
from dotenv import load_dotenv
# 加载 .env 文件
load_dotenv()
# 添加项目根目录和 backend 目录到 Python 路径 # 添加项目根目录和 backend 目录到 Python 路径
sys.path.insert(0, str(Path(__file__).parent.parent)) sys.path.insert(0, str(Path(__file__).parent.parent))
sys.path.insert(0, str(Path(__file__).parent.parent / "backend")) sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
from .index_builder import IndexBuilder, IndexBuilderConfig # 导入方式:条件导入,支持作为脚本运行和作为包导入
from .splitters import SplitterType if __name__ == "__main__":
# 作为脚本直接运行时使用绝对导入
from rag_indexer.index_builder import IndexBuilder, IndexBuilderConfig
from rag_indexer.splitters import SplitterType
else:
# 作为包导入时使用相对导入
from .index_builder import IndexBuilder, IndexBuilderConfig
from .splitters import SplitterType
logging.basicConfig( logging.basicConfig(
level=logging.INFO, level=logging.INFO,

View File

@@ -1,32 +1,71 @@
""" """
RAG Indexer 配置管理模块 RAG Indexer 配置管理模块
集中管理所有环境变量配置项,避免散落在各个文件中 集中管理所有环境变量配置项,避免散落在各个文件中
所有配置直接从环境变量读取,无默认值,避免配置混乱
需要类型转换的配置在此处理
""" """
import os import os
# 尝试从 rag_core 导入配置(如果可用)
try: # ========== 辅助函数:类型转换 ==========
from rag_core.config import ( def _get_str(key: str) -> str | None:
QDRANT_URL, """获取字符串配置"""
QDRANT_API_KEY, return os.getenv(key)
LLAMACPP_EMBEDDING_URL,
LLAMACPP_API_KEY,
DB_URI, def _get_int(key: str) -> int | None:
DOCSTORE_URI, """获取整数配置,自动转换"""
) value = os.getenv(key)
except ImportError: if value is not None:
# 如果 rag_core 不可用,则直接读取环境变量 try:
QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333") return int(value)
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "") except (ValueError, TypeError):
LLAMACPP_EMBEDDING_URL = os.getenv("LLAMACPP_EMBEDDING_URL", "http://127.0.0.1:8082") pass
LLAMACPP_API_KEY = os.getenv("LLAMACPP_API_KEY", "") return None
DB_URI = os.getenv(
"DB_URI",
"postgresql://postgres:huang1998@ai-postgres:5432/langgraph_db?sslmode=disable" def _get_list_str(key: str, default: list[str] | None = None) -> list[str]:
) """获取字符串列表配置,从逗号分隔的字符串解析"""
DOCSTORE_URI = os.getenv("DOCSTORE_URI", DB_URI) value = os.getenv(key)
if value is not None:
return [item.strip() for item in value.split(",") if item.strip()]
return default or []
# ========== 向量数据库配置URL + API密钥 配对) ==========
QDRANT_URL = _get_str("QDRANT_URL")
QDRANT_API_KEY = _get_str("QDRANT_API_KEY")
# ========== 嵌入服务配置URL + API密钥 配对) ==========
LLAMACPP_EMBEDDING_URL = _get_str("LLAMACPP_EMBEDDING_URL")
LLAMACPP_API_KEY = _get_str("LLAMACPP_API_KEY")
# ========== 文档存储配置(分离配置 + 完整URI ==========
# 分离配置(优先使用)
DB_HOST = _get_str("DB_HOST")
DB_PORT = _get_int("DB_PORT")
DB_USER = _get_str("DB_USER")
DB_PASSWORD = _get_str("DB_PASSWORD")
DB_NAME = _get_str("DB_NAME")
# 完整连接字符串(直接从环境变量读取)
DB_URI = _get_str("DB_URI")
# 文档存储 URI直接从环境变量读取默认同 DB_URI
DOCSTORE_URI = _get_str("DOCSTORE_URI") or DB_URI
# ========== 文档加载器配置unstructured 库) ==========
# OCR 语言列表(逗号分隔,如 "chi_sim,eng"
RAG_OCR_LANGUAGES = _get_list_str("RAG_OCR_LANGUAGES", ["chi_sim", "eng"])
# 文档主语言列表(逗号分隔,如 "zh"
RAG_DOC_LANGUAGES = _get_list_str("RAG_DOC_LANGUAGES", ["zh"])
# ========== 索引器专用配置 ========== # ========== 索引器专用配置 ==========
# 默认索引存储路径 # 默认索引存储路径
INDEX_STORAGE_PATH = os.getenv("INDEX_STORAGE_PATH", "./index_storage") INDEX_STORAGE_PATH = _get_str("INDEX_STORAGE_PATH")

View File

@@ -23,6 +23,12 @@ from qdrant_client.http.exceptions import ResponseHandlingException
from .loaders import DocumentLoader from .loaders import DocumentLoader
from .splitters import SplitterType, get_splitter from .splitters import SplitterType, get_splitter
# 从 rag_core 导入
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
from rag_core import LlamaCppEmbedder, QdrantVectorStore, create_docstore, create_parent_retriever from rag_core import LlamaCppEmbedder, QdrantVectorStore, create_docstore, create_parent_retriever
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@@ -11,6 +11,9 @@ from langchain_core.documents import Document
from unstructured.documents.elements import Element from unstructured.documents.elements import Element
from unstructured.partition.auto import partition from unstructured.partition.auto import partition
# 相对导入配置
from .config import RAG_OCR_LANGUAGES, RAG_DOC_LANGUAGES
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# 模块加载时设置一次环境变量,避免重复设置 # 模块加载时设置一次环境变量,避免重复设置
@@ -47,8 +50,8 @@ class DocumentLoader:
""" """
self.extract_images = extract_images self.extract_images = extract_images
self.strategy = strategy self.strategy = strategy
self.ocr_languages = ocr_languages or ["chi_sim", "eng"] self.ocr_languages = ocr_languages or RAG_OCR_LANGUAGES
self.languages = languages or ["zh"] self.languages = languages or RAG_DOC_LANGUAGES
self.include_page_breaks = include_page_breaks self.include_page_breaks = include_page_breaks
self.pdf_infer_table_structure = pdf_infer_table_structure self.pdf_infer_table_structure = pdf_infer_table_structure
self.partition_kwargs = partition_kwargs or {} self.partition_kwargs = partition_kwargs or {}

69
test/test_frontend.py Normal file
View File

@@ -0,0 +1,69 @@
#!/usr/bin/env python3
"""
前端快速测试脚本
验证前端导入是否正常工作
"""
import sys
import os
# 添加必要的路径
project_root = os.path.dirname(os.path.abspath(__file__))
frontend_src = os.path.join(project_root, "frontend", "src")
backend_dir = os.path.join(project_root, "backend")
sys.path.insert(0, project_root)
sys.path.insert(0, frontend_src)
sys.path.insert(0, backend_dir)
print("=" * 60)
print("前端导入测试")
print("=" * 60)
# 测试 1: 直接导入前端模块
print("\n[测试 1] 直接导入前端模块...")
try:
from frontend.src.frontend_main import main
print("✅ frontend_main 导入成功")
except Exception as e:
print(f"❌ 导入失败: {e}")
sys.exit(1)
# 测试 2: 导入配置
print("\n[测试 2] 导入配置...")
try:
from config import config
print(f"✅ config 导入成功: page_title={config.page_title}")
except Exception as e:
print(f"❌ 导入失败: {e}")
# 测试 3: 导入状态管理
print("\n[测试 3] 导入状态管理...")
try:
from state import AppState
print("✅ AppState 导入成功")
except Exception as e:
print(f"❌ 导入失败: {e}")
# 测试 4: 导入 API 客户端
print("\n[测试 4] 导入 API 客户端...")
try:
from api_client import api_client
print("✅ api_client 导入成功")
except Exception as e:
print(f"❌ 导入失败: {e}")
# 测试 5: 导入组件
print("\n[测试 5] 导入组件...")
try:
from components.sidebar import render_sidebar
from components.chat_area import render_chat_area
from components.info_panel import render_info_panel
print("✅ 所有组件导入成功")
except Exception as e:
print(f"❌ 导入失败: {e}")
print("\n" + "=" * 60)
print("🎉 所有前端导入测试通过!")
print("=" * 60)
print("\n现在可以使用 ./scripts/start.sh both 启动完整服务")