容器处理
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -17,6 +17,8 @@
|
||||
!rag_indexer/**
|
||||
!docker/
|
||||
!docker/**
|
||||
!test/
|
||||
!test/**
|
||||
!.gitea/
|
||||
!.gitea/**
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
AI Agent 应用模块
|
||||
"""
|
||||
|
||||
from ..agent import AIAgentService
|
||||
from ..graph.graph_tools import AVAILABLE_TOOLS, TOOLS_BY_NAME
|
||||
from .agent.service import AIAgentService
|
||||
from .graph.graph_tools import AVAILABLE_TOOLS, TOOLS_BY_NAME
|
||||
|
||||
__all__ = ["AIAgentService", "AVAILABLE_TOOLS", "TOOLS_BY_NAME"]
|
||||
|
||||
@@ -5,7 +5,7 @@ WORKDIR /app
|
||||
# =============================================================================
|
||||
# 非敏感环境变量(固化在镜像中,无需通过 .env 配置)
|
||||
# =============================================================================
|
||||
ENV PYTHONPATH=/app
|
||||
ENV PYTHONPATH=/app:/app/backend
|
||||
|
||||
# llama.cpp 服务配置(本地部署标准端口)
|
||||
ENV VLLM_BASE_URL=http://host.docker.internal:18000/v1
|
||||
|
||||
@@ -12,10 +12,10 @@ COPY frontend/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 复制前端代码
|
||||
COPY frontend/src/ ./frontend/
|
||||
COPY frontend/src/ ./src/
|
||||
|
||||
# 暴露端口
|
||||
EXPOSE 8501
|
||||
|
||||
# 启动命令
|
||||
CMD ["streamlit", "run", "frontend/frontend_main.py", "--server.port", "8501", "--server.address", "0.0.0.0", "--server.baseUrlPath", "/ai"]
|
||||
CMD ["streamlit", "run", "src/frontend_main.py", "--server.port", "8501", "--server.address", "0.0.0.0", "--server.baseUrlPath", "/ai"]
|
||||
|
||||
30
frontend/run.py
Normal file
30
frontend/run.py
Normal file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
前端启动包装器
|
||||
保持相对导入的同时,让 Streamlit 能正常运行
|
||||
本地和容器环境使用相同的启动方式
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# 添加项目根目录和 backend 目录到 Python 路径
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
backend_dir = os.path.join(project_root, "backend")
|
||||
sys.path.insert(0, project_root)
|
||||
sys.path.insert(0, backend_dir)
|
||||
|
||||
# 现在用正确的方式启动 Streamlit
|
||||
# 我们不直接运行 frontend_main.py,而是先加载它作为模块
|
||||
from streamlit.web import cli as stcli
|
||||
|
||||
# 设置工作目录到项目根
|
||||
os.chdir(project_root)
|
||||
|
||||
# 构建 Streamlit 参数
|
||||
frontend_main = os.path.join(project_root, "frontend", "src", "frontend_main.py")
|
||||
sys.argv = ["streamlit", "run", frontend_main, "--server.port", "8501", "--server.address", "0.0.0.0"]
|
||||
|
||||
# 启动 Streamlit
|
||||
if __name__ == "__main__":
|
||||
stcli.main()
|
||||
@@ -1,4 +1,5 @@
|
||||
"""
|
||||
UI 组件模块
|
||||
包含所有可复用的 Streamlit 组件
|
||||
"""
|
||||
"""
|
||||
|
||||
|
||||
@@ -6,18 +6,25 @@ AI Agent 前端主入口
|
||||
import sys
|
||||
import os
|
||||
|
||||
# 添加项目根目录到 Python 路径,支持绝对导入
|
||||
# 现在的结构: frontend/src/frontend_main.py,所以要获取 frontend/ 目录作为根
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
# 添加当前目录到路径,确保智能导入能工作
|
||||
src_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.insert(0, src_dir)
|
||||
|
||||
import streamlit as st
|
||||
|
||||
# 使用相对导入
|
||||
from .config import config
|
||||
from .state import AppState
|
||||
from .components.sidebar import render_sidebar
|
||||
from .components.chat_area import render_chat_area
|
||||
from .components.info_panel import render_info_panel
|
||||
# 智能导入:作为 __main__ 被 Streamlit 运行时用绝对导入,否则用相对导入
|
||||
if __name__ == '__main__':
|
||||
from config import config
|
||||
from state import AppState
|
||||
from components.sidebar import render_sidebar
|
||||
from components.chat_area import render_chat_area
|
||||
from components.info_panel import render_info_panel
|
||||
else:
|
||||
from .config import config
|
||||
from .state import AppState
|
||||
from .components.sidebar import render_sidebar
|
||||
from .components.chat_area import render_chat_area
|
||||
from .components.info_panel import render_info_panel
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
@@ -1,188 +0,0 @@
|
||||
"""
|
||||
验证 RAG 索引完整性。
|
||||
|
||||
检查 Qdrant 向量库、PostgreSQL 文档存储及检索功能。
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../.."))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
|
||||
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
||||
DB_URI = os.getenv("DB_URI", "postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable")
|
||||
COLLECTION_NAME = "rag_documents"
|
||||
TABLE_NAME = "parent_documents"
|
||||
|
||||
|
||||
def check_qdrant():
|
||||
"""检查 Qdrant 向量库。"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
print("=" * 60)
|
||||
print("Qdrant 向量库")
|
||||
print("=" * 60)
|
||||
|
||||
client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
|
||||
|
||||
# 集合列表
|
||||
collections = client.get_collections().collections
|
||||
print(f"\n集合数: {len(collections)}")
|
||||
for c in collections:
|
||||
print(f" - {c.name}")
|
||||
|
||||
# 目标集合信息
|
||||
if not any(c.name == COLLECTION_NAME for c in collections):
|
||||
print(f"\n集合 '{COLLECTION_NAME}' 不存在")
|
||||
return
|
||||
|
||||
info = client.get_collection(COLLECTION_NAME)
|
||||
print(f"\n集合 '{COLLECTION_NAME}':")
|
||||
print(f" 状态: {info.status}")
|
||||
print(f" 向量数: {info.points_count}")
|
||||
|
||||
vectors_config = info.config.params.vectors
|
||||
if isinstance(vectors_config, dict):
|
||||
for name, vc in vectors_config.items():
|
||||
print(f" 向量 '{name}': 维度={vc.size}, 距离={vc.distance}")
|
||||
else:
|
||||
print(f" 向量维度: {vectors_config.size}")
|
||||
|
||||
# 抽样查看
|
||||
print(f"\n前 3 个向量:")
|
||||
points = client.scroll(
|
||||
collection_name=COLLECTION_NAME,
|
||||
limit=3,
|
||||
with_payload=True,
|
||||
with_vectors=False
|
||||
)
|
||||
for i, point in enumerate(points[0]):
|
||||
print(f"\n {i+1}. ID: {point.id}")
|
||||
payload = point.payload or {}
|
||||
print(f" 内容: {payload.get('page_content', '')[:100]}...")
|
||||
|
||||
|
||||
async def check_postgres():
|
||||
"""检查 PostgreSQL 文档存储。"""
|
||||
import asyncpg
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("PostgreSQL 文档存储")
|
||||
print("=" * 60)
|
||||
|
||||
conn = await asyncpg.connect(dsn=DB_URI)
|
||||
|
||||
try:
|
||||
# 表是否存在
|
||||
tables = await conn.fetch(
|
||||
"SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"
|
||||
)
|
||||
table_names = [t['table_name'] for t in tables]
|
||||
|
||||
if TABLE_NAME not in table_names:
|
||||
print(f"\n表 '{TABLE_NAME}' 不存在")
|
||||
return
|
||||
|
||||
# 统计
|
||||
count = await conn.fetchval(f"SELECT COUNT(*) FROM {TABLE_NAME}")
|
||||
print(f"\n表 '{TABLE_NAME}': {count} 条记录")
|
||||
|
||||
# 抽样
|
||||
print(f"\n前 3 个文档:")
|
||||
rows = await conn.fetch(
|
||||
f"SELECT key, value FROM {TABLE_NAME} ORDER BY key LIMIT 3"
|
||||
)
|
||||
for i, row in enumerate(rows):
|
||||
print(f"\n {i+1}. Key: {row['key']}")
|
||||
val = row['value']
|
||||
if isinstance(val, dict) and 'page_content' in val:
|
||||
print(f" 内容: {val['page_content'][:100]}...")
|
||||
|
||||
# Key 前缀分布
|
||||
key_prefixes = await conn.fetch(
|
||||
f"""
|
||||
SELECT
|
||||
CASE
|
||||
WHEN key LIKE '%:%' THEN split_part(key, ':', 1)
|
||||
ELSE 'no_prefix'
|
||||
END AS prefix,
|
||||
COUNT(*) AS cnt
|
||||
FROM {TABLE_NAME}
|
||||
GROUP BY prefix
|
||||
ORDER BY cnt DESC
|
||||
LIMIT 10
|
||||
"""
|
||||
)
|
||||
print(f"\nKey 前缀分布:")
|
||||
for row in key_prefixes:
|
||||
print(f" {row['prefix']}: {row['cnt']}")
|
||||
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
|
||||
async def test_search():
|
||||
"""测试检索功能。"""
|
||||
from rag_indexer.index_builder import IndexBuilder, IndexBuilderConfig
|
||||
from rag_indexer.splitters import SplitterType
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("检索测试")
|
||||
print("=" * 60)
|
||||
|
||||
# 使用配置对象初始化(与默认构建方式一致)
|
||||
config = IndexBuilderConfig(
|
||||
collection_name=COLLECTION_NAME,
|
||||
splitter_type=SplitterType.PARENT_CHILD,
|
||||
)
|
||||
builder = IndexBuilder(config)
|
||||
|
||||
# 确保检索器已初始化
|
||||
if builder.retriever is None:
|
||||
print("错误: 检索器未初始化,请检查切分策略")
|
||||
return
|
||||
|
||||
query = input("\n查询 (回车使用默认): ").strip() or "你好"
|
||||
print(f"\n查询: {query}")
|
||||
|
||||
# 标准检索(返回父块,因为 ParentDocumentRetriever 默认返回父块)
|
||||
print("\n--- 标准检索 (返回父块) ---")
|
||||
results = await builder.retriever.ainvoke(query)
|
||||
for i, doc in enumerate(results):
|
||||
content = doc.page_content[:200] if hasattr(doc, 'page_content') else str(doc)[:200]
|
||||
print(f"\n {i+1}. {content}...")
|
||||
if hasattr(doc, 'metadata'):
|
||||
source = doc.metadata.get('source', '')
|
||||
if source:
|
||||
print(f" 来源: {source}")
|
||||
|
||||
# 若需要仅返回子块,可以临时修改检索器的 search_type
|
||||
# (注意:ParentDocumentRetriever 的 search_type 默认为 "similarity")
|
||||
print("\n--- 检索子块 (通过修改检索器参数) ---")
|
||||
# 创建一个新的检索器副本,设置为返回子块
|
||||
# 简单起见,直接调用 vectorstore 进行相似度搜索获取子块
|
||||
vectorstore = builder.vector_store.get_langchain_vectorstore()
|
||||
sub_results = await vectorstore.asimilarity_search(query, k=3)
|
||||
for i, doc in enumerate(sub_results):
|
||||
content = doc.page_content[:200] if hasattr(doc, 'page_content') else str(doc)[:200]
|
||||
print(f"\n {i+1}. {content}...")
|
||||
if hasattr(doc, 'metadata'):
|
||||
parent_id = doc.metadata.get('parent_id', '')
|
||||
if parent_id:
|
||||
print(f" 父块 ID: {parent_id}")
|
||||
|
||||
|
||||
async def main():
|
||||
check_qdrant()
|
||||
await check_postgres()
|
||||
await test_search()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -290,9 +290,9 @@ start_backend() {
|
||||
source .env 2>/dev/null || true
|
||||
set +a
|
||||
|
||||
export PYTHONPATH="$PROJECT_DIR"
|
||||
export PYTHONPATH="$PROJECT_DIR:$PROJECT_DIR/backend"
|
||||
export BACKEND_PORT=8079
|
||||
python app/backend.py &
|
||||
python backend/app/backend.py &
|
||||
BACKEND_PID=$!
|
||||
echo -e "${GREEN}✓ 后端服务已启动 (PID: $BACKEND_PID)${NC}"
|
||||
sleep 2
|
||||
@@ -307,7 +307,7 @@ start_frontend() {
|
||||
source .env 2>/dev/null || true
|
||||
set +a
|
||||
|
||||
export PYTHONPATH="$PROJECT_DIR"
|
||||
export PYTHONPATH="$PROJECT_DIR:$PROJECT_DIR/backend"
|
||||
streamlit run frontend/src/frontend_main.py &
|
||||
FRONTEND_PID=$!
|
||||
echo -e "${GREEN}✓ 前端服务已启动 (PID: $FRONTEND_PID)${NC}"
|
||||
|
||||
@@ -6,20 +6,22 @@
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from .config import DB_URI
|
||||
import sys
|
||||
import uuid
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 添加项目根目录到 Python 路径 (现在文件在 backend/app/ 下,backend 就是根)
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
|
||||
# 添加项目根目录和 backend 目录到 Python 路径
|
||||
project_root = os.path.join(os.path.dirname(__file__), "..")
|
||||
backend_dir = os.path.join(project_root, "backend")
|
||||
sys.path.insert(0, project_root)
|
||||
sys.path.insert(0, backend_dir)
|
||||
load_dotenv()
|
||||
|
||||
from backend.app.config import DB_URI
|
||||
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
|
||||
from ..agent import AIAgentService
|
||||
from ..agent.history import ThreadHistoryService
|
||||
from ..logger import info, warning, error
|
||||
from backend.app.agent.service import AIAgentService
|
||||
from backend.app.agent.history import ThreadHistoryService
|
||||
from backend.app.logger import info, warning, error
|
||||
|
||||
# PostgreSQL 连接字符串
|
||||
|
||||
@@ -5,10 +5,15 @@ import sys
|
||||
import numpy as np
|
||||
from dotenv import load_dotenv
|
||||
from qdrant_client import QdrantClient
|
||||
from backend.rag_core import LlamaCppEmbedder
|
||||
|
||||
# 添加项目根目录和 backend 目录到 Python 路径
|
||||
project_root = os.path.join(os.path.dirname(__file__), "..")
|
||||
backend_dir = os.path.join(project_root, "backend")
|
||||
sys.path.insert(0, project_root)
|
||||
sys.path.insert(0, backend_dir)
|
||||
load_dotenv()
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../.."))
|
||||
|
||||
from rag_core import LlamaCppEmbedder
|
||||
|
||||
QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
|
||||
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
||||
@@ -8,10 +8,11 @@ import os
|
||||
import sys
|
||||
|
||||
# 添加项目根目录到 Python 路径
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
project_root = os.path.join(os.path.dirname(__file__), "..")
|
||||
sys.path.insert(0, project_root)
|
||||
|
||||
from ..index_builder import IndexBuilder
|
||||
from ..splitters import SplitterType
|
||||
from rag_indexer.index_builder import IndexBuilder
|
||||
from rag_indexer.splitters import SplitterType
|
||||
|
||||
async def test_index_builder():
|
||||
"""测试索引构建功能"""
|
||||
@@ -26,7 +27,7 @@ async def test_index_builder():
|
||||
)
|
||||
|
||||
# 测试文档路径
|
||||
test_file = os.path.join(os.path.dirname(__file__), "..", "data", "corpus", "三国演义.txt")
|
||||
test_file = os.path.join(os.path.dirname(__file__), "..", "data", "user_docs", "a.txt")
|
||||
|
||||
if os.path.exists(test_file):
|
||||
# 构建索引
|
||||
Reference in New Issue
Block a user