容器处理

This commit is contained in:
2026-04-21 16:27:05 +08:00
parent 8b354b7ccc
commit 08826c70a3
13 changed files with 80 additions and 220 deletions

2
.gitignore vendored
View File

@@ -17,6 +17,8 @@
!rag_indexer/**
!docker/
!docker/**
!test/
!test/**
!.gitea/
!.gitea/**

View File

@@ -2,7 +2,7 @@
AI Agent 应用模块
"""
from ..agent import AIAgentService
from ..graph.graph_tools import AVAILABLE_TOOLS, TOOLS_BY_NAME
from .agent.service import AIAgentService
from .graph.graph_tools import AVAILABLE_TOOLS, TOOLS_BY_NAME
__all__ = ["AIAgentService", "AVAILABLE_TOOLS", "TOOLS_BY_NAME"]

View File

@@ -5,7 +5,7 @@ WORKDIR /app
# =============================================================================
# 非敏感环境变量(固化在镜像中,无需通过 .env 配置)
# =============================================================================
ENV PYTHONPATH=/app
ENV PYTHONPATH=/app:/app/backend
# llama.cpp 服务配置(本地部署标准端口)
ENV VLLM_BASE_URL=http://host.docker.internal:18000/v1

View File

@@ -12,10 +12,10 @@ COPY frontend/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# 复制前端代码
COPY frontend/src/ ./frontend/
COPY frontend/src/ ./src/
# 暴露端口
EXPOSE 8501
# 启动命令
CMD ["streamlit", "run", "frontend/frontend_main.py", "--server.port", "8501", "--server.address", "0.0.0.0", "--server.baseUrlPath", "/ai"]
CMD ["streamlit", "run", "src/frontend_main.py", "--server.port", "8501", "--server.address", "0.0.0.0", "--server.baseUrlPath", "/ai"]

30
frontend/run.py Normal file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/env python3
"""
前端启动包装器
保持相对导入的同时,让 Streamlit 能正常运行
本地和容器环境使用相同的启动方式
"""
import sys
import os
# 添加项目根目录和 backend 目录到 Python 路径
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
backend_dir = os.path.join(project_root, "backend")
sys.path.insert(0, project_root)
sys.path.insert(0, backend_dir)
# 现在用正确的方式启动 Streamlit
# 我们不直接运行 frontend_main.py而是先加载它作为模块
from streamlit.web import cli as stcli
# 设置工作目录到项目根
os.chdir(project_root)
# 构建 Streamlit 参数
frontend_main = os.path.join(project_root, "frontend", "src", "frontend_main.py")
sys.argv = ["streamlit", "run", frontend_main, "--server.port", "8501", "--server.address", "0.0.0.0"]
# 启动 Streamlit
if __name__ == "__main__":
stcli.main()

View File

@@ -1,4 +1,5 @@
"""
UI 组件模块
包含所有可复用的 Streamlit 组件
"""
"""

View File

@@ -6,18 +6,25 @@ AI Agent 前端主入口
import sys
import os
# 添加项目根目录到 Python 路径,支持绝对导入
# 现在的结构: frontend/src/frontend_main.py所以要获取 frontend/ 目录作为根
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 添加当前目录到路径,确保智能导入能工作
src_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, src_dir)
import streamlit as st
# 使用相对导入
from .config import config
from .state import AppState
from .components.sidebar import render_sidebar
from .components.chat_area import render_chat_area
from .components.info_panel import render_info_panel
# 智能导入:作为 __main__ 被 Streamlit 运行时用绝对导入,否则用相对导入
if __name__ == '__main__':
from config import config
from state import AppState
from components.sidebar import render_sidebar
from components.chat_area import render_chat_area
from components.info_panel import render_info_panel
else:
from .config import config
from .state import AppState
from .components.sidebar import render_sidebar
from .components.chat_area import render_chat_area
from .components.info_panel import render_info_panel
# =============================================================================

View File

@@ -1,188 +0,0 @@
"""
验证 RAG 索引完整性。
检查 Qdrant 向量库、PostgreSQL 文档存储及检索功能。
"""
import asyncio
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../.."))
from dotenv import load_dotenv
load_dotenv()
QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
DB_URI = os.getenv("DB_URI", "postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable")
COLLECTION_NAME = "rag_documents"
TABLE_NAME = "parent_documents"
def check_qdrant():
"""检查 Qdrant 向量库。"""
from qdrant_client import QdrantClient
print("=" * 60)
print("Qdrant 向量库")
print("=" * 60)
client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
# 集合列表
collections = client.get_collections().collections
print(f"\n集合数: {len(collections)}")
for c in collections:
print(f" - {c.name}")
# 目标集合信息
if not any(c.name == COLLECTION_NAME for c in collections):
print(f"\n集合 '{COLLECTION_NAME}' 不存在")
return
info = client.get_collection(COLLECTION_NAME)
print(f"\n集合 '{COLLECTION_NAME}':")
print(f" 状态: {info.status}")
print(f" 向量数: {info.points_count}")
vectors_config = info.config.params.vectors
if isinstance(vectors_config, dict):
for name, vc in vectors_config.items():
print(f" 向量 '{name}': 维度={vc.size}, 距离={vc.distance}")
else:
print(f" 向量维度: {vectors_config.size}")
# 抽样查看
print(f"\n前 3 个向量:")
points = client.scroll(
collection_name=COLLECTION_NAME,
limit=3,
with_payload=True,
with_vectors=False
)
for i, point in enumerate(points[0]):
print(f"\n {i+1}. ID: {point.id}")
payload = point.payload or {}
print(f" 内容: {payload.get('page_content', '')[:100]}...")
async def check_postgres():
"""检查 PostgreSQL 文档存储。"""
import asyncpg
print("\n" + "=" * 60)
print("PostgreSQL 文档存储")
print("=" * 60)
conn = await asyncpg.connect(dsn=DB_URI)
try:
# 表是否存在
tables = await conn.fetch(
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"
)
table_names = [t['table_name'] for t in tables]
if TABLE_NAME not in table_names:
print(f"\n'{TABLE_NAME}' 不存在")
return
# 统计
count = await conn.fetchval(f"SELECT COUNT(*) FROM {TABLE_NAME}")
print(f"\n'{TABLE_NAME}': {count} 条记录")
# 抽样
print(f"\n前 3 个文档:")
rows = await conn.fetch(
f"SELECT key, value FROM {TABLE_NAME} ORDER BY key LIMIT 3"
)
for i, row in enumerate(rows):
print(f"\n {i+1}. Key: {row['key']}")
val = row['value']
if isinstance(val, dict) and 'page_content' in val:
print(f" 内容: {val['page_content'][:100]}...")
# Key 前缀分布
key_prefixes = await conn.fetch(
f"""
SELECT
CASE
WHEN key LIKE '%:%' THEN split_part(key, ':', 1)
ELSE 'no_prefix'
END AS prefix,
COUNT(*) AS cnt
FROM {TABLE_NAME}
GROUP BY prefix
ORDER BY cnt DESC
LIMIT 10
"""
)
print(f"\nKey 前缀分布:")
for row in key_prefixes:
print(f" {row['prefix']}: {row['cnt']}")
finally:
await conn.close()
async def test_search():
"""测试检索功能。"""
from rag_indexer.index_builder import IndexBuilder, IndexBuilderConfig
from rag_indexer.splitters import SplitterType
print("\n" + "=" * 60)
print("检索测试")
print("=" * 60)
# 使用配置对象初始化(与默认构建方式一致)
config = IndexBuilderConfig(
collection_name=COLLECTION_NAME,
splitter_type=SplitterType.PARENT_CHILD,
)
builder = IndexBuilder(config)
# 确保检索器已初始化
if builder.retriever is None:
print("错误: 检索器未初始化,请检查切分策略")
return
query = input("\n查询 (回车使用默认): ").strip() or "你好"
print(f"\n查询: {query}")
# 标准检索(返回父块,因为 ParentDocumentRetriever 默认返回父块)
print("\n--- 标准检索 (返回父块) ---")
results = await builder.retriever.ainvoke(query)
for i, doc in enumerate(results):
content = doc.page_content[:200] if hasattr(doc, 'page_content') else str(doc)[:200]
print(f"\n {i+1}. {content}...")
if hasattr(doc, 'metadata'):
source = doc.metadata.get('source', '')
if source:
print(f" 来源: {source}")
# 若需要仅返回子块,可以临时修改检索器的 search_type
# 注意ParentDocumentRetriever 的 search_type 默认为 "similarity"
print("\n--- 检索子块 (通过修改检索器参数) ---")
# 创建一个新的检索器副本,设置为返回子块
# 简单起见,直接调用 vectorstore 进行相似度搜索获取子块
vectorstore = builder.vector_store.get_langchain_vectorstore()
sub_results = await vectorstore.asimilarity_search(query, k=3)
for i, doc in enumerate(sub_results):
content = doc.page_content[:200] if hasattr(doc, 'page_content') else str(doc)[:200]
print(f"\n {i+1}. {content}...")
if hasattr(doc, 'metadata'):
parent_id = doc.metadata.get('parent_id', '')
if parent_id:
print(f" 父块 ID: {parent_id}")
async def main():
check_qdrant()
await check_postgres()
await test_search()
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -290,9 +290,9 @@ start_backend() {
source .env 2>/dev/null || true
set +a
export PYTHONPATH="$PROJECT_DIR"
export PYTHONPATH="$PROJECT_DIR:$PROJECT_DIR/backend"
export BACKEND_PORT=8079
python app/backend.py &
python backend/app/backend.py &
BACKEND_PID=$!
echo -e "${GREEN}✓ 后端服务已启动 (PID: $BACKEND_PID)${NC}"
sleep 2
@@ -307,7 +307,7 @@ start_frontend() {
source .env 2>/dev/null || true
set +a
export PYTHONPATH="$PROJECT_DIR"
export PYTHONPATH="$PROJECT_DIR:$PROJECT_DIR/backend"
streamlit run frontend/src/frontend_main.py &
FRONTEND_PID=$!
echo -e "${GREEN}✓ 前端服务已启动 (PID: $FRONTEND_PID)${NC}"

View File

@@ -6,20 +6,22 @@
import asyncio
import os
from .config import DB_URI
import sys
import uuid
from dotenv import load_dotenv
# 添加项目根目录到 Python 路径 (现在文件在 backend/app/ 下backend 就是根)
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
# 添加项目根目录和 backend 目录到 Python 路径
project_root = os.path.join(os.path.dirname(__file__), "..")
backend_dir = os.path.join(project_root, "backend")
sys.path.insert(0, project_root)
sys.path.insert(0, backend_dir)
load_dotenv()
from backend.app.config import DB_URI
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
from ..agent import AIAgentService
from ..agent.history import ThreadHistoryService
from ..logger import info, warning, error
from backend.app.agent.service import AIAgentService
from backend.app.agent.history import ThreadHistoryService
from backend.app.logger import info, warning, error
# PostgreSQL 连接字符串

View File

@@ -5,10 +5,15 @@ import sys
import numpy as np
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from backend.rag_core import LlamaCppEmbedder
# 添加项目根目录和 backend 目录到 Python 路径
project_root = os.path.join(os.path.dirname(__file__), "..")
backend_dir = os.path.join(project_root, "backend")
sys.path.insert(0, project_root)
sys.path.insert(0, backend_dir)
load_dotenv()
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../.."))
from rag_core import LlamaCppEmbedder
QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")

View File

@@ -8,10 +8,11 @@ import os
import sys
# 添加项目根目录到 Python 路径
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
project_root = os.path.join(os.path.dirname(__file__), "..")
sys.path.insert(0, project_root)
from ..index_builder import IndexBuilder
from ..splitters import SplitterType
from rag_indexer.index_builder import IndexBuilder
from rag_indexer.splitters import SplitterType
async def test_index_builder():
"""测试索引构建功能"""
@@ -26,7 +27,7 @@ async def test_index_builder():
)
# 测试文档路径
test_file = os.path.join(os.path.dirname(__file__), "..", "data", "corpus", "三国演义.txt")
test_file = os.path.join(os.path.dirname(__file__), "..", "data", "user_docs", "a.txt")
if os.path.exists(test_file):
# 构建索引