diff --git a/.gitignore b/.gitignore index 4120caa..e9074b3 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,8 @@ !rag_indexer/** !docker/ !docker/** +!test/ +!test/** !.gitea/ !.gitea/** diff --git a/backend/app/__init__.py b/backend/app/__init__.py index c32b8e6..2bd75d5 100644 --- a/backend/app/__init__.py +++ b/backend/app/__init__.py @@ -2,7 +2,7 @@ AI Agent 应用模块 """ -from ..agent import AIAgentService -from ..graph.graph_tools import AVAILABLE_TOOLS, TOOLS_BY_NAME +from .agent.service import AIAgentService +from .graph.graph_tools import AVAILABLE_TOOLS, TOOLS_BY_NAME __all__ = ["AIAgentService", "AVAILABLE_TOOLS", "TOOLS_BY_NAME"] diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 0caf150..0d14d0d 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -5,7 +5,7 @@ WORKDIR /app # ============================================================================= # 非敏感环境变量(固化在镜像中,无需通过 .env 配置) # ============================================================================= -ENV PYTHONPATH=/app +ENV PYTHONPATH=/app:/app/backend # llama.cpp 服务配置(本地部署标准端口) ENV VLLM_BASE_URL=http://host.docker.internal:18000/v1 diff --git a/docker/frontend/Dockerfile b/docker/frontend/Dockerfile index 0b4adc4..ef1c547 100644 --- a/docker/frontend/Dockerfile +++ b/docker/frontend/Dockerfile @@ -12,10 +12,10 @@ COPY frontend/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # 复制前端代码 -COPY frontend/src/ ./frontend/ +COPY frontend/src/ ./src/ # 暴露端口 EXPOSE 8501 # 启动命令 -CMD ["streamlit", "run", "frontend/frontend_main.py", "--server.port", "8501", "--server.address", "0.0.0.0", "--server.baseUrlPath", "/ai"] +CMD ["streamlit", "run", "src/frontend_main.py", "--server.port", "8501", "--server.address", "0.0.0.0", "--server.baseUrlPath", "/ai"] diff --git a/frontend/run.py b/frontend/run.py new file mode 100644 index 0000000..e59fbee --- /dev/null +++ b/frontend/run.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +""" +前端启动包装器 +保持相对导入的同时,让 Streamlit 能正常运行 +本地和容器环境使用相同的启动方式 +""" + +import sys +import os + +# 添加项目根目录和 backend 目录到 Python 路径 +project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +backend_dir = os.path.join(project_root, "backend") +sys.path.insert(0, project_root) +sys.path.insert(0, backend_dir) + +# 现在用正确的方式启动 Streamlit +# 我们不直接运行 frontend_main.py,而是先加载它作为模块 +from streamlit.web import cli as stcli + +# 设置工作目录到项目根 +os.chdir(project_root) + +# 构建 Streamlit 参数 +frontend_main = os.path.join(project_root, "frontend", "src", "frontend_main.py") +sys.argv = ["streamlit", "run", frontend_main, "--server.port", "8501", "--server.address", "0.0.0.0"] + +# 启动 Streamlit +if __name__ == "__main__": + stcli.main() diff --git a/frontend/src/components/__init__.py b/frontend/src/components/__init__.py index 64baaad..bf24dcc 100644 --- a/frontend/src/components/__init__.py +++ b/frontend/src/components/__init__.py @@ -1,4 +1,5 @@ """ UI 组件模块 包含所有可复用的 Streamlit 组件 -""" \ No newline at end of file +""" + diff --git a/frontend/src/frontend_main.py b/frontend/src/frontend_main.py index 508b42c..9eefae2 100644 --- a/frontend/src/frontend_main.py +++ b/frontend/src/frontend_main.py @@ -6,18 +6,25 @@ AI Agent 前端主入口 import sys import os -# 添加项目根目录到 Python 路径,支持绝对导入 -# 现在的结构: frontend/src/frontend_main.py,所以要获取 frontend/ 目录作为根 -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +# 添加当前目录到路径,确保智能导入能工作 +src_dir = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, src_dir) import streamlit as st -# 使用相对导入 -from .config import config -from .state import AppState -from .components.sidebar import render_sidebar -from .components.chat_area import render_chat_area -from .components.info_panel import render_info_panel +# 智能导入:作为 __main__ 被 Streamlit 运行时用绝对导入,否则用相对导入 +if __name__ == '__main__': + from config import config + from state import AppState + from components.sidebar import render_sidebar + from components.chat_area import render_chat_area + from components.info_panel import render_info_panel +else: + from .config import config + from .state import AppState + from .components.sidebar import render_sidebar + from .components.chat_area import render_chat_area + from .components.info_panel import render_info_panel # ============================================================================= diff --git a/rag_indexer/test/reset_index.py b/rag_indexer/clear_qdrant.py similarity index 100% rename from rag_indexer/test/reset_index.py rename to rag_indexer/clear_qdrant.py diff --git a/rag_indexer/test/test_validate_index.py b/rag_indexer/test/test_validate_index.py deleted file mode 100644 index 65017eb..0000000 --- a/rag_indexer/test/test_validate_index.py +++ /dev/null @@ -1,188 +0,0 @@ -""" -验证 RAG 索引完整性。 - -检查 Qdrant 向量库、PostgreSQL 文档存储及检索功能。 -""" - -import asyncio -import os -import sys - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../..")) - -from dotenv import load_dotenv -load_dotenv() - -QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333") -QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") -DB_URI = os.getenv("DB_URI", "postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable") -COLLECTION_NAME = "rag_documents" -TABLE_NAME = "parent_documents" - - -def check_qdrant(): - """检查 Qdrant 向量库。""" - from qdrant_client import QdrantClient - - print("=" * 60) - print("Qdrant 向量库") - print("=" * 60) - - client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY) - - # 集合列表 - collections = client.get_collections().collections - print(f"\n集合数: {len(collections)}") - for c in collections: - print(f" - {c.name}") - - # 目标集合信息 - if not any(c.name == COLLECTION_NAME for c in collections): - print(f"\n集合 '{COLLECTION_NAME}' 不存在") - return - - info = client.get_collection(COLLECTION_NAME) - print(f"\n集合 '{COLLECTION_NAME}':") - print(f" 状态: {info.status}") - print(f" 向量数: {info.points_count}") - - vectors_config = info.config.params.vectors - if isinstance(vectors_config, dict): - for name, vc in vectors_config.items(): - print(f" 向量 '{name}': 维度={vc.size}, 距离={vc.distance}") - else: - print(f" 向量维度: {vectors_config.size}") - - # 抽样查看 - print(f"\n前 3 个向量:") - points = client.scroll( - collection_name=COLLECTION_NAME, - limit=3, - with_payload=True, - with_vectors=False - ) - for i, point in enumerate(points[0]): - print(f"\n {i+1}. ID: {point.id}") - payload = point.payload or {} - print(f" 内容: {payload.get('page_content', '')[:100]}...") - - -async def check_postgres(): - """检查 PostgreSQL 文档存储。""" - import asyncpg - - print("\n" + "=" * 60) - print("PostgreSQL 文档存储") - print("=" * 60) - - conn = await asyncpg.connect(dsn=DB_URI) - - try: - # 表是否存在 - tables = await conn.fetch( - "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'" - ) - table_names = [t['table_name'] for t in tables] - - if TABLE_NAME not in table_names: - print(f"\n表 '{TABLE_NAME}' 不存在") - return - - # 统计 - count = await conn.fetchval(f"SELECT COUNT(*) FROM {TABLE_NAME}") - print(f"\n表 '{TABLE_NAME}': {count} 条记录") - - # 抽样 - print(f"\n前 3 个文档:") - rows = await conn.fetch( - f"SELECT key, value FROM {TABLE_NAME} ORDER BY key LIMIT 3" - ) - for i, row in enumerate(rows): - print(f"\n {i+1}. Key: {row['key']}") - val = row['value'] - if isinstance(val, dict) and 'page_content' in val: - print(f" 内容: {val['page_content'][:100]}...") - - # Key 前缀分布 - key_prefixes = await conn.fetch( - f""" - SELECT - CASE - WHEN key LIKE '%:%' THEN split_part(key, ':', 1) - ELSE 'no_prefix' - END AS prefix, - COUNT(*) AS cnt - FROM {TABLE_NAME} - GROUP BY prefix - ORDER BY cnt DESC - LIMIT 10 - """ - ) - print(f"\nKey 前缀分布:") - for row in key_prefixes: - print(f" {row['prefix']}: {row['cnt']}") - - finally: - await conn.close() - - -async def test_search(): - """测试检索功能。""" - from rag_indexer.index_builder import IndexBuilder, IndexBuilderConfig - from rag_indexer.splitters import SplitterType - - print("\n" + "=" * 60) - print("检索测试") - print("=" * 60) - - # 使用配置对象初始化(与默认构建方式一致) - config = IndexBuilderConfig( - collection_name=COLLECTION_NAME, - splitter_type=SplitterType.PARENT_CHILD, - ) - builder = IndexBuilder(config) - - # 确保检索器已初始化 - if builder.retriever is None: - print("错误: 检索器未初始化,请检查切分策略") - return - - query = input("\n查询 (回车使用默认): ").strip() or "你好" - print(f"\n查询: {query}") - - # 标准检索(返回父块,因为 ParentDocumentRetriever 默认返回父块) - print("\n--- 标准检索 (返回父块) ---") - results = await builder.retriever.ainvoke(query) - for i, doc in enumerate(results): - content = doc.page_content[:200] if hasattr(doc, 'page_content') else str(doc)[:200] - print(f"\n {i+1}. {content}...") - if hasattr(doc, 'metadata'): - source = doc.metadata.get('source', '') - if source: - print(f" 来源: {source}") - - # 若需要仅返回子块,可以临时修改检索器的 search_type - # (注意:ParentDocumentRetriever 的 search_type 默认为 "similarity") - print("\n--- 检索子块 (通过修改检索器参数) ---") - # 创建一个新的检索器副本,设置为返回子块 - # 简单起见,直接调用 vectorstore 进行相似度搜索获取子块 - vectorstore = builder.vector_store.get_langchain_vectorstore() - sub_results = await vectorstore.asimilarity_search(query, k=3) - for i, doc in enumerate(sub_results): - content = doc.page_content[:200] if hasattr(doc, 'page_content') else str(doc)[:200] - print(f"\n {i+1}. {content}...") - if hasattr(doc, 'metadata'): - parent_id = doc.metadata.get('parent_id', '') - if parent_id: - print(f" 父块 ID: {parent_id}") - - -async def main(): - check_qdrant() - await check_postgres() - await test_search() - - -if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file diff --git a/scripts/start.sh b/scripts/start.sh index 3092c46..bf39a0a 100755 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -290,9 +290,9 @@ start_backend() { source .env 2>/dev/null || true set +a - export PYTHONPATH="$PROJECT_DIR" + export PYTHONPATH="$PROJECT_DIR:$PROJECT_DIR/backend" export BACKEND_PORT=8079 - python app/backend.py & + python backend/app/backend.py & BACKEND_PID=$! echo -e "${GREEN}✓ 后端服务已启动 (PID: $BACKEND_PID)${NC}" sleep 2 @@ -307,7 +307,7 @@ start_frontend() { source .env 2>/dev/null || true set +a - export PYTHONPATH="$PROJECT_DIR" + export PYTHONPATH="$PROJECT_DIR:$PROJECT_DIR/backend" streamlit run frontend/src/frontend_main.py & FRONTEND_PID=$! echo -e "${GREEN}✓ 前端服务已启动 (PID: $FRONTEND_PID)${NC}" diff --git a/backend/app/test_backend.py b/test/test_backend.py similarity index 95% rename from backend/app/test_backend.py rename to test/test_backend.py index 6af60d2..d95a71a 100644 --- a/backend/app/test_backend.py +++ b/test/test_backend.py @@ -6,20 +6,22 @@ import asyncio import os -from .config import DB_URI import sys import uuid from dotenv import load_dotenv -# 添加项目根目录到 Python 路径 (现在文件在 backend/app/ 下,backend 就是根) -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) - +# 添加项目根目录和 backend 目录到 Python 路径 +project_root = os.path.join(os.path.dirname(__file__), "..") +backend_dir = os.path.join(project_root, "backend") +sys.path.insert(0, project_root) +sys.path.insert(0, backend_dir) load_dotenv() +from backend.app.config import DB_URI from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver -from ..agent import AIAgentService -from ..agent.history import ThreadHistoryService -from ..logger import info, warning, error +from backend.app.agent.service import AIAgentService +from backend.app.agent.history import ThreadHistoryService +from backend.app.logger import info, warning, error # PostgreSQL 连接字符串 diff --git a/rag_indexer/test/test_inspect_vectors.py b/test/test_dqrant.py similarity index 85% rename from rag_indexer/test/test_inspect_vectors.py rename to test/test_dqrant.py index 3d671a1..0f8a313 100644 --- a/rag_indexer/test/test_inspect_vectors.py +++ b/test/test_dqrant.py @@ -5,10 +5,15 @@ import sys import numpy as np from dotenv import load_dotenv from qdrant_client import QdrantClient -from backend.rag_core import LlamaCppEmbedder + +# 添加项目根目录和 backend 目录到 Python 路径 +project_root = os.path.join(os.path.dirname(__file__), "..") +backend_dir = os.path.join(project_root, "backend") +sys.path.insert(0, project_root) +sys.path.insert(0, backend_dir) load_dotenv() -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../..")) + +from rag_core import LlamaCppEmbedder QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333") QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") diff --git a/rag_indexer/test/test_refactored.py b/test/test_rag_indexer_result.py similarity index 92% rename from rag_indexer/test/test_refactored.py rename to test/test_rag_indexer_result.py index 4649bd8..70c7105 100644 --- a/rag_indexer/test/test_refactored.py +++ b/test/test_rag_indexer_result.py @@ -8,10 +8,11 @@ import os import sys # 添加项目根目录到 Python 路径 -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) +project_root = os.path.join(os.path.dirname(__file__), "..") +sys.path.insert(0, project_root) -from ..index_builder import IndexBuilder -from ..splitters import SplitterType +from rag_indexer.index_builder import IndexBuilder +from rag_indexer.splitters import SplitterType async def test_index_builder(): """测试索引构建功能""" @@ -26,7 +27,7 @@ async def test_index_builder(): ) # 测试文档路径 - test_file = os.path.join(os.path.dirname(__file__), "..", "data", "corpus", "三国演义.txt") + test_file = os.path.join(os.path.dirname(__file__), "..", "data", "user_docs", "a.txt") if os.path.exists(test_file): # 构建索引