diff --git a/.env b/.env
index ac0ae32..23ffd0d 100644
--- a/.env
+++ b/.env
@@ -14,30 +14,30 @@ LLAMACPP_API_KEY=token-abc123
 # llama.cpp 服务配置
 # -----------------------------------------------------------------------------
 # 主 LLM 服务 (Gemma-4-E2B GGUF) - 端口 8081
-VLLM_BASE_URL=http://localhost:8081/v1
+VLLM_BASE_URL=http://127.0.0.1:8081/v1
 
 # Embedding 服务 (embeddinggemma-300M GGUF) - 端口 8082
-VLLM_EMBEDDING_URL=http://localhost:8082/v1
+LLAMACPP_EMBEDDING_URL=http://127.0.0.1:8082/v1
 
 # -----------------------------------------------------------------------------
 # Mem0 记忆层配置
 # -----------------------------------------------------------------------------
 # ⭐ 注意：Mem0 现在直接复用主 LLM 实例，无需单独配置
-# Qdrant 向量数据库地址（远程服务器）
+# Qdrant 向量数据库地址（重点：统一使用远程源）
 QDRANT_URL=http://115.190.121.151:6333
 QDRANT_COLLECTION_NAME=mem0_user_memories
 
 # -----------------------------------------------------------------------------
 # 数据库配置
 # -----------------------------------------------------------------------------
-# PostgreSQL 连接字符串（远程服务器）
-DB_URI=postgresql://postgres:mysecretpassword@115.190.121.151:5432/langgraph_db?sslmode=disable
+# PostgreSQL 连接字符串（重点：统一使用远程源）
+DB_URI=postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable
 
 # -----------------------------------------------------------------------------
 # 前端配置
 # -----------------------------------------------------------------------------
 # 后端 API 地址（本地开发使用 8083 端口，避免与 llama.cpp 冲突）
-API_URL=http://localhost:8083/chat
+API_URL=http://127.0.0.1:8083/chat
 
 # -----------------------------------------------------------------------------
 # 应用行为配置
diff --git a/.env.docker b/.env.docker
index fc1f0ed..c0bc275 100644
--- a/.env.docker
+++ b/.env.docker
@@ -32,10 +32,10 @@ ENABLE_GRAPH_TRACE=false
 # llama.cpp 服务配置
 # -----------------------------------------------------------------------------
 # 主 LLM 服务 (Gemma-4-E2B GGUF) - 端口 8081
-VLLM_BASE_URL=http://localhost:8081/v1
+VLLM_BASE_URL=http://host.docker.internal:18000/v1
 
 # Embedding 服务 (embeddinggemma-300M GGUF) - 端口 8082
-VLLM_EMBEDDING_URL=http://localhost:8082/v1
+LLAMACPP_EMBEDDING_URL=http://host.docker.internal:18001/v1
 
 # -----------------------------------------------------------------------------
 # Mem0 记忆层配置
@@ -48,7 +48,7 @@ QDRANT_COLLECTION_NAME=mem0_user_memories
 # 数据库配置
 # -----------------------------------------------------------------------------
 # PostgreSQL 连接字符串（远程服务器上的独立容器）
-DB_URI=postgresql://postgres:mysecretpassword@115.190.121.151:5432/langgraph_db?sslmode=disable
+DB_URI=postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable
 
 # -----------------------------------------------------------------------------
 # 前端配置
diff --git a/.env.example b/.env.example
deleted file mode 100644
index 60d775b..0000000
--- a/.env.example
+++ /dev/null
@@ -1,73 +0,0 @@
-# =============================================================================
-# Agent1 环境配置模板
-# 用法: cp .env.example .env 然后根据实际需求修改
-# =============================================================================
-
-# ⭐ 敏感密钥配置
-# =============================================================================
-# AI 模型 API 密钥
-ZHIPUAI_API_KEY=your_zhipuai_api_key_here
-DEEPSEEK_API_KEY=your_deepseek_api_key_here
-
-# llama.cpp 服务认证 Token（与容器启动参数一致）
-LLAMACPP_API_KEY=token-abc123
-
-# ⭐ 日志调试配置（本地开发可灵活调整）
-# =============================================================================
-# 日志级别：DEBUG, INFO, WARNING, ERROR, CRITICAL
-# 本地开发推荐 DEBUG，生产环境使用 WARNING
-LOG_LEVEL=DEBUG
-
-# 是否启用 DEBUG 模式
-DEBUG=true
-
-# 是否启用 Graph 流转追踪
-ENABLE_GRAPH_TRACE=true
-
-# ⭐ 可选配置（如需覆盖 Dockerfile/docker-compose.yml 中的默认值）
-# =============================================================================
-# 数据库连接（如需使用本地数据库而非远程服务器）
-# DB_URI=postgresql://postgres:mysecretpassword@localhost:5432/langgraph_db?sslmode=disable
-
-# Qdrant 地址（如需使用本地 Qdrant 而非远程服务器）
-# QDRANT_URL=http://localhost:6333
-QDRANT_COLLECTION_NAME=mem0_user_memories
-
-# llama.cpp 服务地址（如端口有变化）
-# VLLM_BASE_URL=http://localhost:8081/v1
-# VLLM_EMBEDDING_URL=http://localhost:8082/v1
-
-# 前端 API 地址（本地开发时需显式配置）
-# 注意：这里只需要域名和端口，不需要 /chat 路径
-API_URL=http://localhost:8083
-```
-
-```
-# ============================================================================
-# AI Agent 项目环境变量配置
-# ⚠️ 重要：此文件包含敏感信息，请勿提交到版本控制系统！
-# ============================================================================
-
-# AI 模型 API Keys（必须配置）
-ZHIPUAI_API_KEY=your_zhipu_api_key_here
-DEEPSEEK_API_KEY=your_deepseek_api_key_here
-LLAMACPP_API_KEY=your_llamacpp_api_key_here
-
-# 日志配置（可选，有默认值）
-LOG_LEVEL=WARNING
-DEBUG=false
-ENABLE_GRAPH_TRACE=false
-
-# 数据库配置（可选，有默认值）
-DB_URI=postgresql://postgres:mysecretpassword@db:5432/langgraph_db?sslmode=disable
-
-# Qdrant 配置（可选，有默认值）
-QDRANT_URL=http://qdrant:6333
-QDRANT_COLLECTION_NAME=mem0_user_memories
-
-# llama.cpp 服务配置（可选，有默认值）
-VLLM_BASE_URL=http://llamacpp:8081/v1
-VLLM_EMBEDDING_URL=http://llamacpp:8082/v1
-
-# 前端 API 地址（可选，有默认值）
-API_URL=http://localhost:8083
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 739e0c6..33ac963 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,5 +1,6 @@
 {
     "editor.fontSize": 24,
     "editor.formatOnSave": true,
-    "files.autoSave": "onWindowChange"
+    "files.autoSave": "onWindowChange",
+    "Codegeex.RepoIndex": true
 }
\ No newline at end of file
diff --git a/FEATURES.md b/FEATURES.md
deleted file mode 100644
index bb858a9..0000000
--- a/FEATURES.md
+++ /dev/null
@@ -1,302 +0,0 @@
-# 🎯 AI Agent 新功能说明
-
-## 新增功能概览
-
-本次更新实现了三大核心功能：**用户登录隔离**、**对话历史管理**、**流式实时响应**。
-
----
-
-## 一、用户登录系统
-
-### 功能特性
-- ✅ **可选登录**：用户可以选择输入用户名或直接使用默认用户
-- ✅ **对话隔离**：不同用户的对话历史完全隔离，避免污染
-- ✅ **默认用户**：未登录时使用 `default_user`，所有未登录用户共享对话
-
-### 使用方式
-1. 启动前端后，左侧栏显示登录界面
-2. 输入用户名（可选），点击"进入"
-3. 如需切换用户，点击"切换用户"按钮
-
-### 技术实现
-- 前端：`st.session_state.user_id` 和 `st.session_state.logged_in` 管理登录状态
-- 后端：所有 API 请求携带 `user_id` 参数，用于数据隔离
-- 数据库：LangGraph checkpoint 的 `metadata` 字段存储 `user_id`
-
----
-
-## 二、对话历史管理
-
-### 功能特性
-- ✅ **历史列表**：左侧栏显示用户的所有对话历史
-- ✅ **摘要展示**：每个历史对话显示摘要（第一条消息或生成的 summary）
-- ✅ **一键加载**：点击历史对话，自动加载完整消息历史
-- ✅ **新对话**：点击"新对话"按钮创建全新对话线程
-- ✅ **实时更新**：每次对话结束后自动刷新历史列表
-
-### 使用方式
-1. 点击"刷新列表"按钮加载历史对话
-2. 点击任意历史对话，自动加载完整消息历史
-3. 点击"新对话"开始全新话题
-
-### 技术实现
-
-#### 后端新增接口
-| 接口 | 方法 | 说明 |
-|------|------|------|
-| `/threads` | GET | 获取用户的对话历史列表 |
-| `/thread/{thread_id}/messages` | GET | 获取指定线程的完整消息历史 |
-| `/thread/{thread_id}/summary` | GET | 获取指定线程的摘要信息 |
-
-#### 新增模块
-- `app/history.py`: `ThreadHistoryService` 类，封装历史查询逻辑
-- 直接查询 LangGraph 的 `checkpoints` 表，通过 `metadata->>'user_id'` 过滤
-
-#### 前端实现
-- 左侧栏显示历史列表，每个对话显示摘要、时间和消息数量
-- 当前选中的对话高亮显示（primary 按钮样式）
-- 点击历史对话调用 `/thread/{thread_id}/messages` 加载完整历史
-
----
-
-## 三、流式实时响应
-
-### 功能特性
-- ✅ **逐字输出**：AI 回复实时逐字显示，提升用户体验
-- ✅ **工具调用状态**：显示工具调用的开始和完成状态
-- ✅ **Token 统计**：对话结束后显示消耗的 token 数量和耗时
-- ✅ **错误处理**：流式响应异常时友好提示用户
-
-### 使用方式
-- 在输入框输入问题后，AI 回复会逐字显示，无需等待完整响应
-- 如果 AI 调用工具，会显示"🔧 调用工具: xxx..."的提示
-- 工具调用完成后显示"✅ 工具 xxx 完成"
-- 回复完成后显示 token 消耗和耗时统计
-
-### 技术实现
-
-#### 后端流式接口
-| 接口 | 方法 | 说明 |
-|------|------|------|
-| `/chat/stream` | POST | 流式对话接口（SSE） |
-
-#### SSE 事件类型
-```json
-{
-  "type": "token",           // AI 逐字输出
-  "content": "你好"
-}
-
-{
-  "type": "tool_start",      // 工具调用开始
-  "tool": "search_calendar"
-}
-
-{
-  "type": "tool_end",        // 工具调用完成
-  "tool": "search_calendar"
-}
-
-{
-  "type": "done",            // 对话完成
-  "reply": "完整回复内容",
-  "token_usage": {"total_tokens": 123},
-  "elapsed_time": 2.5
-}
-
-{
-  "type": "error",           // 错误信息
-  "message": "错误详情"
-}
-```
-
-#### Agent 流式处理
-- `AIAgentService.process_message_stream()` 方法
-- 使用 LangGraph 的 `astream_events()` API 获取流式事件
-- 支持所有模型（zhipu, deepseek, local）
-
-#### 前端流式消费
-- 使用 `requests.post(..., stream=True)` 消费 SSE 流
-- 逐行解析 `data: {...}` 格式的事件
-- 实时更新 UI 显示 token 和工具状态
-
----
-
-## 四、三栏布局设计
-
-### 布局结构
-```
-┌──────────────┬──────────────────────────┬──────────────┐
-│  左侧栏 (1)  │      中间栏 (3)          │  右侧栏 (1)  │
-│              │                          │              │
-│  👤 用户     │  🤖 AI 个人助手          │  📊 会话信息 │
-│  [登录]      │                          │              │
-│              │  [模型选择器]            │  当前对话    │
-│  📚 历史     │  ┌────────────────────┐  │  xxx...      │
-│  [刷新]      │  │                    │  │              │
-│  [新对话]    │  │   聊天消息区域     │  │  消息统计    │
-│              │  │                    │  │  用户: 5     │
-│  💬 对话1    │  └────────────────────┘  │  AI: 4       │
-│  💬 对话2    │                          │              │
-│  💬 对话3    │  [输入框]                │  💡 使用提示 │
-│              │                          │              │
-└──────────────┴──────────────────────────┴──────────────┘
-```
-
-### 各栏功能
-
-#### 左侧栏（宽度 1/5）
-- **用户登录**：输入用户名，切换用户
-- **历史列表**：刷新、点击加载、新对话按钮
-
-#### 中间栏（宽度 3/5）
-- **模型选择**：下拉框选择 AI 模型
-- **聊天区域**：显示消息历史，支持流式输出
-- **输入框**：输入用户问题
-
-#### 右侧栏（宽度 1/5）
-- **会话信息**：显示当前线程 ID
-- **消息统计**：用户消息和 AI 回复数量
-- **使用提示**：功能说明
-
----
-
-## 五、配置说明
-
-### 环境变量
-
-#### 本地开发（.env）
-```bash
-# API 地址（注意：不需要 /chat 后缀）
-API_URL=http://localhost:8083
-
-# 日志调试配置（本地开发推荐 DEBUG）
-LOG_LEVEL=DEBUG
-DEBUG=true
-ENABLE_GRAPH_TRACE=true
-```
-
-#### Docker 部署（.env.docker）
-```bash
-# API 地址（Docker 内部网络）
-API_URL=http://backend:8083
-
-# 日志调试配置（生产环境推荐 WARNING）
-LOG_LEVEL=WARNING
-DEBUG=false
-ENABLE_GRAPH_TRACE=false
-```
-
-### 端口分配
-
-| 服务 | 端口 | 说明 |
-|------|------|------|
-| llama.cpp LLM | 8081 | Gemma-4-E2B GGUF 模型 |
-| llama.cpp Embedding | 8082 | embeddinggemma-300M GGUF 模型 |
-| Backend (FastAPI) | 8083 | AI Agent 后端服务 |
-| Frontend (Streamlit) | 8501 | Web 界面 |
-
----
-
-## 六、文件变更清单
-
-### 新增文件
-| 文件 | 说明 |
-|------|------|
-| `app/history.py` | 历史查询服务 `ThreadHistoryService` |
-
-### 修改文件
-| 文件 | 修改内容 |
-|------|---------|
-| `app/agent.py` | • 添加 `process_message_stream()` 流式处理方法<br>• `process_message()` 写入 `metadata` 支持历史查询 |
-| `app/backend.py` | • 添加 `/threads`、`/thread/{id}/messages`、`/thread/{id}/summary` 接口<br>• 添加 `/chat/stream` 流式接口<br>• 注入 `history_service` |
-| `frontend/frontend.py` | • 完全重写为三栏布局<br>• 实现用户登录和历史管理<br>• 支持流式响应消费 |
-| `.env`, `.env.docker`, `.env.example` | • 移除 `API_URL` 中的 `/chat` 后缀 |
-
----
-
-## 七、使用示例
-
-### 1. 本地开发启动
-```bash
-# 启动后端和前端
-./scripts/start.sh both
-
-# 访问前端
-open http://localhost:8501
-```
-
-### 2. Docker 部署
-```bash
-# 配置环境变量
-cp .env.docker .env
-# 编辑 .env 填入 API Key
-
-# 启动服务
-cd docker
-docker compose up -d
-```
-
-### 3. API 测试
-```bash
-# 获取历史列表
-curl "http://localhost:8083/threads?user_id=test_user"
-
-# 获取线程消息
-curl "http://localhost:8083/thread/{thread_id}/messages?user_id=test_user"
-
-# 流式对话
-curl -X POST "http://localhost:8083/chat/stream" \
-  -H "Content-Type: application/json" \
-  -d '{
-    "message": "你好",
-    "thread_id": "test-thread",
-    "model": "zhipu",
-    "user_id": "test_user"
-  }'
-```
-
----
-
-## 八、注意事项
-
-### 1. 数据库查询性能
-- 当前直接查询 `checkpoints` 表的 JSONB `metadata` 字段
-- 如果用户对话数量很大，建议在 `checkpoints` 表上创建 GIN 索引：
-  ```sql
-  CREATE INDEX idx_checkpoints_metadata_user_id 
-  ON checkpoints USING GIN ((metadata->>'user_id'));
-  ```
-
-### 2. 流式响应缓冲
-- 如果使用 Nginx 反向代理，需要关闭缓冲：
-  ```nginx
-  location /chat/stream {
-      proxy_pass http://backend:8083;
-      proxy_buffering off;
-      proxy_cache off;
-  }
-  ```
-
-### 3. 历史列表分页
-- 当前默认返回 50 条历史记录
-- 如需支持更多历史，可在 `/threads` 接口添加 `offset` 参数实现分页
-
-### 4. 用户认证增强
-- 当前用户登录仅为前端输入，无密码验证
-- 如需加强安全性，可集成 OAuth2 或 JWT 认证
-
----
-
-## 九、下一步优化建议
-
-1. **对话摘要生成**：在 `summarize` 节点中生成对话摘要，存入 checkpoint metadata
-2. **历史记录搜索**：添加关键词搜索功能，快速定位历史对话
-3. **对话导出**：支持导出对话历史为 Markdown 或 JSON 格式
-4. **多设备同步**：同一用户的不同设备共享对话历史
-5. **对话标签**：支持为对话添加标签和分类
-6. **收藏功能**：支持收藏重要对话，方便快速访问
-
----
-
-**🎉 新功能已全部实现并测试通过！**
\ No newline at end of file
diff --git a/LOGGING.md b/LOGGING.md
deleted file mode 100644
index 05ffd9a..0000000
--- a/LOGGING.md
+++ /dev/null
@@ -1,251 +0,0 @@
-# 📝 日志使用规范
-
-## 统一日志系统
-
-本项目采用统一的日志系统，确保后端和前端的日志输出格式一致，便于调试和监控。
-
----
-
-## 📁 日志模块位置
-
-### 后端日志
-- **模块路径**：`app/logger.py`
-- **日志器名称**：`ai_agent`
-- **使用方式**：
-  ```python
-  from app.logger import debug, info, warning, error
-  ```
-
-### 前端日志
-- **模块路径**：`frontend/logger.py`
-- **日志器名称**：`ai_agent_frontend`
-- **使用方式**：
-  ```python
-  from frontend.logger import debug, info, warning, error
-  # 或
-  from .logger import debug, info, warning, error  # 在 frontend 包内
-  ```
-
----
-
-## 🎯 日志级别
-
-| 级别 | 函数 | 使用场景 | 环境变量控制 |
-|------|------|---------|-------------|
-| **DEBUG** | `debug()` | 详细调试信息（变量值、中间状态） | `DEBUG=true` 时输出 |
-| **INFO** | `info()` | 关键流程节点（服务启动、API 请求） | 始终输出 |
-| **WARNING** | `warning()` | 警告信息（配置缺失、降级处理） | 始终输出 |
-| **ERROR** | `error()` | 错误信息（异常、失败） | 始终输出 |
-
----
-
-## 📝 使用示例
-
-### 后端使用（app/ 目录下）
-
-```python
-from app.logger import debug, info, warning, error
-
-async def process_message(self, message: str, ...):
-    info(f"收到用户消息: {message[:50]}...")
-    
-    try:
-        result = await graph.ainvoke(...)
-        debug(f"Graph 执行结果: {result}")
-        return result
-    except Exception as e:
-        error(f"消息处理失败: {e}")
-        raise
-```
-
-### 前端使用（frontend/ 目录下）
-
-```python
-from .logger import error, warning
-
-class APIClient:
-    def get_user_threads(self, user_id: str):
-        try:
-            resp = requests.get(...)
-            if resp.status_code != 200:
-                error(f"获取历史列表失败: HTTP {resp.status_code}")
-                return []
-        except Exception as e:
-            error(f"获取历史列表异常: {e}")
-            return []
-```
-
----
-
-## ⚙️ 配置说明
-
-### 环境变量
-
-| 变量 | 说明 | 默认值 | 示例 |
-|------|------|--------|------|
-| `LOG_LEVEL` | 日志级别 | `INFO` | `DEBUG`, `INFO`, `WARNING`, `ERROR` |
-| `DEBUG` | 调试模式 | `false` | `true`, `false` |
-
-### 本地开发配置（.env）
-
-```bash
-# 输出详细调试信息
-LOG_LEVEL=DEBUG
-DEBUG=true
-```
-
-### Docker 部署配置（.env.docker）
-
-```bash
-# 仅输出关键信息，减少日志量
-LOG_LEVEL=WARNING
-DEBUG=false
-```
-
----
-
-## 🚫 禁止事项
-
-### ❌ 不要使用 `print()`
-
-```python
-# ❌ 错误
-print("处理消息...")
-print(f"错误: {e}")
-
-# ✅ 正确
-info("处理消息...")
-error(f"错误: {e}")
-```
-
-### ❌ 不要使用 `loguru`
-
-```python
-# ❌ 错误
-from loguru import logger
-logger.info("消息")
-
-# ✅ 正确
-from app.logger import info  # 后端
-from frontend.logger import info  # 前端
-info("消息")
-```
-
-### ❌ 不要在工具函数中使用日志
-
-工具函数应保持纯粹，避免副作用：
-
-```python
-# ❌ 错误
-@tool
-def read_file(filename: str):
-    info(f"读取文件: {filename}")  # 工具函数不应有日志
-    return content
-
-# ✅ 正确（日志在调用工具的地方）
-async def tool_call_node(state):
-    info(f"调用工具: read_file")
-    result = await read_file.ainvoke(...)
-    return result
-```
-
----
-
-## 📊 日志格式
-
-### 输出格式
-
-```
-2026-04-16 10:30:45 | INFO     | ai_agent | 收到用户消息: 你好...
-2026-04-16 10:30:45 | DEBUG    | ai_agent | Graph 执行结果: {...}
-2026-04-16 10:30:46 | WARNING  | ai_agent_frontend | JSON 解析失败: ...
-2026-04-16 10:30:46 | ERROR    | ai_agent | 消息处理失败: ConnectionError
-```
-
-### 字段说明
-
-| 字段 | 说明 |
-|------|------|
-| 时间 | `YYYY-MM-DD HH:MM:SS` |
-| 级别 | `DEBUG`, `INFO`, `WARNING`, `ERROR`（8 字符宽度，左对齐） |
-| 日志器 | `ai_agent`（后端）或 `ai_agent_frontend`（前端） |
-| 消息 | 日志内容 |
-
----
-
-## 🔧 最佳实践
-
-### 1. 使用结构化日志
-
-```python
-# ✅ 推荐：包含关键信息
-info(f"用户 {user_id} 调用模型 {model_name}")
-
-# ❌ 不推荐：信息不完整
-info("调用模型")
-```
-
-### 2. 异常日志包含堆栈
-
-```python
-# ✅ 推荐：记录完整异常信息
-try:
-    result = await api_call()
-except Exception as e:
-    error(f"API 调用失败: {e}", exc_info=True)
-```
-
-### 3. 敏感信息脱敏
-
-```python
-# ✅ 推荐：隐藏敏感信息
-debug(f"API Key: {api_key[:4]}...{api_key[-4:]}")
-
-# ❌ 错误：泄露完整密钥
-debug(f"API Key: {api_key}")
-```
-
-### 4. 日志级别合理使用
-
-```python
-# ✅ 推荐：根据重要性选择级别
-info("服务启动成功")           # 关键流程
-debug(f"配置参数: {config}")   # 调试信息
-warning("配置缺失，使用默认值")  # 警告但不影响运行
-error("数据库连接失败")         # 严重错误
-```
-
----
-
-## 📋 文件清单
-
-| 文件 | 日志导入 | 说明 |
-|------|---------|------|
-| `app/agent.py` | `from app.logger import debug, info, warning, error` | ✅ 正确 |
-| `app/backend.py` | `from app.logger import debug, info, warning, error` | ✅ 正确 |
-| `app/history.py` | `from app.logger import error` | ✅ 已修复 |
-| `app/nodes/*.py` | `from app.logger import ...` | ✅ 正确 |
-| `app/tools.py` | 无日志 | ✅ 正确（工具函数不使用日志） |
-| `frontend/api_client.py` | `from .logger import error, warning` | ✅ 已修复 |
-| `frontend/logger.py` | 自身定义 | ✅ 前端日志模块 |
-
----
-
-## 🎯 总结
-
-### 核心原则
-1. **统一模块**：后端使用 `app.logger`，前端使用 `frontend.logger`
-2. **禁止 print**：所有输出必须通过日志模块
-3. **禁止 loguru**：不使用第三方日志库
-4. **环境控制**：通过 `LOG_LEVEL` 和 `DEBUG` 控制输出
-5. **工具纯粹**：工具函数不使用日志，日志在调用方
-
-### 优势
-- ✅ 格式统一：所有日志输出格式一致
-- ✅ 易于调试：支持分级输出，开发时查看详细信息
-- ✅ 性能优化：生产环境可减少日志量
-- ✅ 便于监控：日志格式标准化，便于日志收集和分析
-
----
-
-**📝 所有文件已按照日志规范统一！**
\ No newline at end of file
diff --git a/QUICKSTART.md b/QUICKSTART.md
index aac929a..2f5f92b 100644
--- a/QUICKSTART.md
+++ b/QUICKSTART.md
@@ -25,8 +25,8 @@ vim .env  # 或使用你喜欢的编辑器
 - `LLAMACPP_API_KEY` - llama.cpp 服务认证 Token（与容器启动参数一致，默认 `token-abc123`）
 
 **可选配置项**：
-- `VLLM_BASE_URL` - LLM 服务地址（默认已配置为 `http://localhost:8081/v1`）
-- `VLLM_EMBEDDING_URL` - Embedding 服务地址（默认已配置为 `http://localhost:8082/v1`）
+- `VLLM_BASE_URL` - LLM 服务地址（本地默认：`http://127.0.0.1:8081/v1`，Docker容器访问宿主机：`http://host.docker.internal:18000/v1`）
+- `LLAMACPP_EMBEDDING_URL` - Embedding 服务地址（本地默认：`http://127.0.0.1:8082/v1`，Docker容器访问宿主机：`http://host.docker.internal:18001/v1`）
 - `DB_URI` - PostgreSQL 连接字符串（默认已配置，使用远程服务器地址）
 - `QDRANT_URL` - Qdrant 向量数据库地址（默认已配置，使用远程服务器地址）
 
@@ -44,8 +44,8 @@ docker compose -f docker/docker-compose.yml up -d --build
 - 访问地址：`http://your-domain.com` 或 `http://your-server-ip`
 
 **如果未配置 Nginx（直接访问容器）**：
-- **前端**: http://localhost:8501
-- **后端 API**: http://localhost:8001
+- **前端**: http://127.0.0.1:8501
+- **后端 API**: http://127.0.0.1:8001
 
 #### 常用命令
 
@@ -95,16 +95,19 @@ vim .env
 ZHIPUAI_API_KEY=your_api_key_here
 LLAMACPP_API_KEY=token-abc123
 
-# 本地开发时，llama.cpp 服务在 localhost
-VLLM_BASE_URL=http://localhost:8081/v1
-VLLM_EMBEDDING_URL=http://localhost:8082/v1
+# 本地开发时，llama.cpp 服务在 127.0.0.1
+VLLM_BASE_URL=http://127.0.0.1:8081/v1  # 本地开发
+LLAMACPP_EMBEDDING_URL=http://127.0.0.1:8082/v1  # 本地开发
+# 或
+VLLM_BASE_URL=http://host.docker.internal:18000/v1  # Docker容器访问宿主机
+LLAMACPP_EMBEDDING_URL=http://host.docker.internal:18001/v1  # Docker容器访问宿主机
 
 # 数据库和向量存储使用远程服务器
-DB_URI=postgresql://postgres:mysecretpassword@115.190.121.151:5432/langgraph_db?sslmode=disable
+DB_URI=postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable
 QDRANT_URL=http://115.190.121.151:6333
 
-# 本地开发时，后端也在 localhost
-API_URL=http://localhost:8083/chat
+# 本地开发时，后端也在 127.0.0.1
+API_URL=http://127.0.0.1:8083/chat
 ```
 
 #### 3. 启动服务
@@ -119,7 +122,7 @@ python app/backend.py
 cd frontend && streamlit run app.py
 ```
 
-浏览器自动打开前端页面（如果配置了 Nginx，访问 `http://your-domain.com`；否则访问 http://localhost:8501）
+浏览器自动打开前端页面（如果配置了 Nginx，访问 `http://your-domain.com`；否则访问 http://127.0.0.1:8501）
 
 ---
 
@@ -223,7 +226,7 @@ docker compose exec backend bash
 docker compose logs -f backend
 
 # 测试后端 API
-curl http://localhost:8001/health
+curl http://127.0.0.1:8001/health
 ```
 
 ---
@@ -249,7 +252,7 @@ curl http://localhost:8001/health
 
 在 Gitea 仓库设置中添加：
 - `ZHIPUAI_API_KEY`
-- `VLLM_LOCAL_KEY`
+- `LLAMACPP_API_KEY`
 
 ---
 
diff --git a/README.md b/README.md
index f433e06..e654505 100644
--- a/README.md
+++ b/README.md
@@ -134,8 +134,8 @@ docker compose -f docker/docker-compose.yml up -d --build
 # 3. 访问应用
 # 如果配置了 Nginx 反向代理：http://your-domain.com 或 http://your-server-ip
 # 如果未配置 Nginx（直接访问）：
-#   - 前端: http://localhost:8501
-#   - 后端 API: http://localhost:8001
+#   - 前端: http://127.0.0.1:8501
+#   - 后端 API: http://127.0.0.1:8001
 ```
 
 ### 方式二：本地开发模式
@@ -255,21 +255,26 @@ model_configs = {
 
 **使用方法：**
 
-- **本地开发**：`cp .env.example .env`，修改为 localhost 相关地址
+- **本地开发**：`cp .env.example .env`，修改为 127.0.0.1 相关地址
 - **Docker 部署**：`cp .env.docker .env`，使用远程服务器地址
 
 ### 必需的环境变量
 
 | 变量名 | 说明 | 本地开发示例 | Docker 部署示例 |
 |--------|------|------------|----------------|
-| `ZHIPUAI_API_KEY` | 智谱 AI API 密钥 | `your_key_here` | `your_key_here` |
-| `DEEPSEEK_API_KEY` | DeepSeek API 密钥 | `your_key_here` | `your_key_here` |
-| `LLAMACPP_API_KEY` | llama.cpp 认证 Token | `token-abc123` | `token-abc123` |
-| `VLLM_BASE_URL` | LLM 服务地址 | `http://localhost:8081/v1` | `http://localhost:8081/v1` |
-| `VLLM_EMBEDDING_URL` | Embedding 服务地址 | `http://localhost:8082/v1` | `http://localhost:8082/v1` |
-| `QDRANT_URL` | Qdrant 地址 | `http://115.190.121.151:6333` | `http://115.190.121.151:6333` |
-| `DB_URI` | PostgreSQL 连接字符串 | `postgresql://...@115.190.121.151:5432/...` | `postgresql://...@115.190.121.151:5432/...` |
-| `API_URL` | 后端 API 地址 | `http://localhost:8083/chat` | （由 docker-compose.yml 注入） |
+| `QDRANT_URL` | Qdrant 向量数据库地址 | `http://127.0.0.1:6333` | `http://your-server:6333` |
+| `QDRANT_COLLECTION_NAME` | Qdrant 集合名称 | `mem0_user_memories` | `your_collection_name` |
+| `VLLM_BASE_URL` | LLM 服务地址（本地） | `http://127.0.0.1:8081/v1` | `http://your-server:8081/v1` |
+| `LLAMACPP_EMBEDDING_URL` | Embedding 服务地址（本地） | `http://127.0.0.1:8082/v1` | `http://your-server:8082/v1` |
+| `VLLM_BASE_URL` | LLM 服务地址（Docker） | `http://host.docker.internal:18000/v1` | `http://your-server:18000/v1` |
+| `LLAMACPP_EMBEDDING_URL` | Embedding 服务地址（Docker） | `http://host.docker.internal:18001/v1` | `http://your-server:18001/v1` |
+| `LLAMACPP_API_KEY` | llama.cpp API 密钥 | `your-llamacpp-api-key` | `your-real-api-key` |
+| `ZHIPUAI_API_KEY` | 智谱AI API密钥 | `your-zhipuai-api-key` | `your-real-api-key` |
+| `DEEPSEEK_API_KEY` | DeepSeek API密钥 | `your-deepseek-api-key` | `your-real-api-key` |
+| `VLLM_BASE_URL` | vLLM 服务地址 | `http://127.0.0.1:8081/v1` | `http://your-server:8081/v1` |
+| `LOG_LEVEL` | 日志级别 | `INFO` | `DEBUG`/`INFO`/`WARNING`/`ERROR` |
+| `ENABLE_GRAPH_TRACE` | 是否启用图流转追踪 | `true` | `true`/`false` |
+| `MEMORY_SUMMARIZE_INTERVAL` | 对话摘要生成间隔 | `10` | `5`/`10`/`15` |
 
 ### 配置示例
 
@@ -278,11 +283,13 @@ model_configs = {
 ZHIPUAI_API_KEY=your_api_key_here
 DEEPSEEK_API_KEY=your_deepseek_api_key_here
 LLAMACPP_API_KEY=token-abc123
-VLLM_BASE_URL=http://localhost:8081/v1
-VLLM_EMBEDDING_URL=http://localhost:8082/v1
+VLLM_BASE_URL=http://127.0.0.1:8081/v1
+LLAMACPP_EMBEDDING_URL=http://127.0.0.1:8082/v1  # 本地开发
+# 或
+LLAMACPP_EMBEDDING_URL=http://host.docker.internal:18001/v1  # Docker容器访问宿主机
 QDRANT_URL=http://115.190.121.151:6333
-DB_URI=postgresql://postgres:mysecretpassword@115.190.121.151:5432/langgraph_db?sslmode=disable
-API_URL=http://localhost:8083/chat
+DB_URI=postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable
+API_URL=http://127.0.0.1:8083/chat
 ```
 
 #### Docker 部署 (.env.docker)
@@ -290,10 +297,12 @@ API_URL=http://localhost:8083/chat
 ZHIPUAI_API_KEY=your_api_key_here
 DEEPSEEK_API_KEY=your_deepseek_api_key_here
 LLAMACPP_API_KEY=token-abc123
-VLLM_BASE_URL=http://localhost:8081/v1
-VLLM_EMBEDDING_URL=http://localhost:8082/v1
+VLLM_BASE_URL=http://127.0.0.1:8081/v1
+LLAMACPP_EMBEDDING_URL=http://127.0.0.1:8082/v1  # 本地开发
+# 或
+LLAMACPP_EMBEDDING_URL=http://host.docker.internal:18001/v1  # Docker容器访问宿主机
 QDRANT_URL=http://115.190.121.151:6333
-DB_URI=postgresql://postgres:mysecretpassword@115.190.121.151:5432/langgraph_db?sslmode=disable
+DB_URI=postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable
 # API_URL 在 docker-compose.yml 中配置为 http://backend:8083/chat
 ```
 
diff --git a/REMOTE_SERVICES_MIGRATION.md b/REMOTE_SERVICES_MIGRATION.md
deleted file mode 100644
index 922c094..0000000
--- a/REMOTE_SERVICES_MIGRATION.md
+++ /dev/null
@@ -1,180 +0,0 @@
-# 远程服务配置迁移指南
-
-## 📋 变更概述
-
-从 **2026-04-15** 起，项目已将 PostgreSQL 和 Qdrant 服务迁移到远程服务器（`115.190.121.151`），本地开发环境不再需要运行这些服务的容器。
-
-## 🌐 远程服务地址
-
-| 服务 | 远程地址 | 端口 | 说明 |
-|------|---------|------|------|
-| **PostgreSQL** | `115.190.121.151` | `5432` | LangGraph 状态持久化 |
-| **Qdrant** | `115.190.121.151` | `6333` | Mem0 向量数据库 |
-
-## 🔧 已修改的配置文件
-
-### 1. `.env` - 本地开发配置
-```bash
-# 之前（本地容器）
-QDRANT_URL=http://localhost:6333
-DB_URI=postgresql://postgres:mysecretpassword@localhost:5432/langgraph_db?sslmode=disable
-
-# 现在（远程服务器）
-QDRANT_URL=http://115.190.121.151:6333
-DB_URI=postgresql://postgres:mysecretpassword@115.190.121.151:5432/langgraph_db?sslmode=disable
-```
-
-### 2. `.env.docker` - Docker Compose 配置
-```bash
-# 之前（Docker 内部网络）
-QDRANT_URL=http://qdrant:6333
-DB_URI=postgresql://postgres:mysecretpassword@postgres:5432/langgraph_db?sslmode=disable
-
-# 现在（远程服务器）
-QDRANT_URL=http://115.190.121.151:6333
-DB_URI=postgresql://postgres:mysecretpassword@115.190.121.151:5432/langgraph_db?sslmode=disable
-```
-
-### 3. `docker/docker-compose.yml` - Docker Compose 编排
-```yaml
-# ❌ 已移除的服务
-# postgres:
-#   image: postgres:16
-#   ...
-
-# qdrant:
-#   image: qdrant/qdrant:latest
-#   ...
-
-# ✅ backend 服务配置更新
-backend:
-  environment:
-    - DB_URI=postgresql://postgres:mysecretpassword@115.190.121.151:5432/langgraph_db?sslmode=disable
-    - QDRANT_URL=http://115.190.121.151:6333
-  # ⭐ 移除了 depends_on (postgres, qdrant)
-```
-
-## 🚀 使用方式
-
-### 本地开发（直接运行 Python）
-```bash
-# 1. 确保 .env 文件已更新（已完成）
-cat .env | grep -E "(QDRANT_URL|DB_URI)"
-
-# 2. 启动后端服务
-python app/backend.py
-
-# 3. 启动前端服务
-cd frontend && streamlit run app.py
-```
-
-### Docker Compose 部署
-```bash
-# 1. 确保 .env.docker 文件已更新（已完成）
-cp .env.docker .env
-
-# 2. 启动服务（仅 backend 和 frontend）
-cd docker
-docker compose up -d
-
-# 3. 查看日志
-docker compose logs -f backend
-```
-
-## ⚠️ 注意事项
-
-### 1. 网络连接
-- 确保本地机器可以访问 `115.190.121.151` 的 `5432` 和 `6333` 端口
-- 测试连接：
-  ```bash
-  # 测试 PostgreSQL
-  psql -h 115.190.121.151 -U postgres -d langgraph_db
-  
-  # 测试 Qdrant
-  curl http://115.190.121.151:6333/collections
-  ```
-
-### 2. 防火墙配置
-如果无法连接，检查远程服务器的防火墙规则：
-```bash
-# 在远程服务器上执行
-sudo ufw allow 5432/tcp
-sudo ufw allow 6333/tcp
-sudo ufw reload
-```
-
-### 3. 数据持久化
-- PostgreSQL 数据存储在远程服务器的 `~/docker_volumes/postgres_data`
-- Qdrant 数据存储在远程服务器的 `~/docker_volumes/qdrant_storage`
-- **无需在本地维护数据卷**
-
-### 4. 备份与恢复
-如需备份远程数据库：
-```bash
-# 备份 PostgreSQL
-pg_dump -h 115.190.121.151 -U postgres langgraph_db > backup_$(date +%Y%m%d).sql
-
-# 备份 Qdrant（通过 API 导出集合）
-curl http://115.190.121.151:6333/collections/mem0_user_memories/snapshot > snapshot.zip
-```
-
-## 🔄 回滚到本地容器（可选）
-
-如果需要使用本地容器进行测试，可以：
-
-1. **修改 `.env` 文件**：
-   ```bash
-   QDRANT_URL=http://localhost:6333
-   DB_URI=postgresql://postgres:mysecretpassword@localhost:5432/langgraph_db?sslmode=disable
-   ```
-
-2. **启动本地容器**：
-   ```bash
-   docker run -d --name qdrant_server -p 6333:6333 qdrant/qdrant
-   docker run -d --name ai-postgres -e POSTGRES_PASSWORD=mysecretpassword -e POSTGRES_DB=langgraph_db -p 5432:5432 postgres:16
-   ```
-
-3. **初始化数据库表**：
-   ```bash
-   python scripts/init_db.py
-   ```
-
-## 📊 架构对比
-
-### 之前（本地容器）
-```
-┌─────────────┐     ┌──────────┐     ┌──────────┐
-│  Frontend   │────▶│ Backend  │────▶│ Postgres │ (localhost:5432)
-│  :8501      │     │  :8001   │     └──────────┘
-└─────────────┘     └──────────┘     ┌──────────┐
-                                      │  Qdrant  │ (localhost:6333)
-                                      └──────────┘
-```
-
-### 现在（远程服务）
-```
-┌─────────────┐     ┌──────────┐     ┌──────────────────┐
-│  Frontend   │────▶│ Backend  │────▶│ Remote Services  │
-│  :8501      │     │  :8001   │     │                  │
-└─────────────┘     └──────────┘     │ • Postgres       │
-                                     │   (115.190.121.151:5432)
-                                     │ • Qdrant         │
-                                     │   (115.190.121.151:6333)
-                                     └──────────────────┘
-```
-
-## ✅ 验证清单
-
-- [x] `.env` 文件已更新为远程地址
-- [x] `.env.docker` 文件已更新为远程地址
-- [x] `.env.example` 模板已更新
-- [x] `docker-compose.yml` 已移除 postgres 和 qdrant 服务
-- [x] 远程服务器上的服务正常运行
-- [ ] 本地可以连接到远程 PostgreSQL
-- [ ] 本地可以连接到远程 Qdrant
-- [ ] 应用功能测试通过
-
----
-
-**最后更新**: 2026-04-15  
-**维护者**: AI Agent Team
diff --git a/app/agent.py b/app/agent.py
index a000a8f..53d5715 100644
--- a/app/agent.py
+++ b/app/agent.py
@@ -4,6 +4,7 @@ AI Agent 服务类 - 支持多模型动态切换
 """
 
 import os
+import json
 from dotenv import load_dotenv
 from langchain_community.chat_models import ChatZhipuAI
 from langchain_openai import ChatOpenAI
@@ -41,8 +42,9 @@ class AIAgentService:
             api_key=api_key,
             temperature=0.1,
             max_tokens=4096,
-            timeout=60.0,          # 请求超时时间（秒）
-            max_retries=2,          # 失败后自动重试次数
+            timeout=120.0,         # 增加请求超时时间（秒），原为60秒
+            max_retries=3,          # 增加重试次数，原为2次
+            streaming=True,         # 确保开启流式输出
         )
 
     def _create_deepseek_llm(self):
@@ -58,6 +60,7 @@ class AIAgentService:
             max_tokens=4096,
             timeout=60.0,          # 请求超时时间（秒）
             max_retries=2,          # 失败后自动重试次数
+            streaming=True,         # 确保开启流式输出
         )
 
     def _create_local_llm(self):
@@ -65,7 +68,7 @@ class AIAgentService:
         # vLLM 服务地址：优先从环境变量读取，适配 Docker、FRP 穿透和本地开发
         vllm_base_url = os.getenv(
             "VLLM_BASE_URL",
-            "http://localhost:8081/v1"
+            "http://127.0.0.1:8081/v1"
         )
         
         return ChatOpenAI(
@@ -74,14 +77,15 @@ class AIAgentService:
             model="gemma-4-E2B-it",
             timeout=60.0,          # 请求超时时间（秒）
             max_retries=2,          # 失败后自动重试次数
+            streaming=True,         # 确保开启流式输出
         )
 
     async def initialize(self):
         """预编译所有模型的 graph（使用传入的 checkpointer）"""
         model_configs = {
-            "zhipu": self._create_zhipu_llm,
-            "deepseek": self._create_deepseek_llm,
-            "local": self._create_local_llm,
+            "local": self._create_local_llm,      # 本地模型作为第一个
+            "deepseek": self._create_deepseek_llm, # DeepSeek 作为中间
+            "zhipu": self._create_zhipu_llm,      # GLM-4.7 作为最后一个
         }
 
         for model_name, llm_creator in model_configs.items():
@@ -107,7 +111,7 @@ class AIAgentService:
 
         return self
 
-    async def process_message(self, message: str, thread_id: str, model: str = "zhipu", user_id: str = "default_user") -> dict:
+    async def process_message(self, message: str, thread_id: str, model: str = "local", user_id: str = "default_user") -> dict:
         """
         处理用户消息，返回包含回复、token统计和耗时的字典
         
@@ -156,6 +160,28 @@ class AIAgentService:
             "elapsed_time": elapsed_time
         }
     
+    def _serialize_value(self, value):
+        """递归将 LangChain 对象转换为可 JSON 序列化的格式"""
+        if hasattr(value, 'content'):
+            # LangChain 消息对象
+            msg_type = getattr(value, 'type', 'message')
+            return {
+                "role": msg_type,
+                "content": getattr(value, 'content', ''),
+                "additional_kwargs": getattr(value, 'additional_kwargs', {}),
+                "tool_calls": getattr(value, 'tool_calls', [])
+            }
+        elif isinstance(value, dict):
+            return {k: self._serialize_value(v) for k, v in value.items()}
+        elif isinstance(value, (list, tuple)):
+            return [self._serialize_value(item) for item in value]
+        else:
+            try:
+                json.dumps(value)
+                return value
+            except (TypeError, ValueError):
+                return str(value)
+
     async def process_message_stream(self, message: str, thread_id: str, model_name: str, user_id: str = "default_user"):
         """
         流式处理消息，返回异步生成器
@@ -170,10 +196,9 @@ class AIAgentService:
             字典，包含事件类型和数据
         """
         graph = self.graphs.get(model_name)
+        
         if not graph:
-            warning(f"警告: 模型 '{model_name}' 不可用，使用默认模型")
-            model_name = next(iter(self.graphs.keys()))
-            graph = self.graphs[model_name]
+            raise ValueError(f"模型 '{model_name}' 未找到或未初始化")
         
         config = {
             "configurable": {"thread_id": thread_id},
@@ -182,36 +207,71 @@ class AIAgentService:
         input_state = {"messages": [{"role": "user", "content": message}]}
         context = GraphContext(user_id=user_id)
         
-        # 使用 astream_events 获取流式事件
-        async for event in graph.astream_events(input_state, config=config, context=context, version="v2"):
-            kind = event["event"]
-            
-            # 聊天模型流式输出
-            if kind == "on_chat_model_stream":
-                content = event["data"]["chunk"].content
-                if content:
-                    yield {"type": "token", "content": content}
-            
-            # 工具调用开始
-            elif kind == "on_tool_start":
-                tool_name = event["name"]
-                yield {"type": "tool_start", "tool": tool_name}
-            
-            # 工具调用结束
-            elif kind == "on_tool_end":
-                tool_name = event["name"]
-                yield {"type": "tool_end", "tool": tool_name}
-            
-            # 链结束，获取最终结果
-            elif kind == "on_chain_end" and event["name"] == "LangGraph":
-                output = event["data"]["output"]
-                reply = output["messages"][-1].content if output.get("messages") else ""
-                token_usage = output.get("last_token_usage", {})
-                elapsed_time = output.get("last_elapsed_time", 0.0)
+        async for chunk in graph.astream(
+            input_state,
+            config=config,
+            context=context,
+            stream_mode=["messages", "updates", "custom"],  # 组合多种模式，添加 custom
+            version="v2",                         # 使用统一的v2格式
+            subgraphs=True                        # 如果你使用了子图，请开启此项
+        ):
+            chunk_type = chunk["type"]
+            processed_event = {}
+
+            # 1. 处理 LLM Token 流 (实现打字机效果)
+            if chunk_type == "messages":
+                message_chunk, metadata = chunk["data"]
                 
-                yield {
-                    "type": "done",
-                    "reply": reply,
-                    "token_usage": token_usage,
-                    "elapsed_time": elapsed_time
+                # 提取元数据
+                node_name = metadata.get("langgraph_node", "unknown")
+                # 使用 getattr 安全地获取内容，因为 message_chunk 可能不是字符串
+                token_content = getattr(message_chunk, 'content', str(message_chunk))
+                
+                # 提取 DeepSeek reasoner 的思考过程 token
+                reasoning_token = ""
+                if hasattr(message_chunk, 'additional_kwargs'):
+                    reasoning_token = message_chunk.additional_kwargs.get("reasoning_content", "")
+                
+                # [DEBUG] 临时添加：只在 reasoning_token 不为空时打印，方便你直观地看到它
+                if reasoning_token:
+                    import logging
+                    logging.debug(f"💡 [Reasoning Token 捕获]: {repr(reasoning_token)}")
+                
+                processed_event = {
+                    "type": "llm_token",
+                    "node": node_name,
+                    "token": token_content,
+                    "reasoning_token": reasoning_token,
+                    "metadata": metadata  # 可选的元数据
                 }
+            
+            # 2. 处理状态更新 (节点执行完成)
+            elif chunk_type == "updates":
+                updates_data = chunk["data"]
+                # 序列化 updates 中的所有数据
+                serialized_data = self._serialize_value(updates_data)
+                processed_event = {
+                    "type": "state_update",
+                    "data": serialized_data
+                }
+                # 为了兼容前端旧字段，也保留 messages 字段（可选）
+                if "messages" in serialized_data:
+                    processed_event["messages"] = serialized_data["messages"]
+            
+            # 3. 处理自定义数据 (如果需要)
+            elif chunk_type == "custom":
+                # 自定义事件同样需要序列化
+                serialized_data = self._serialize_value(chunk["data"])
+                processed_event = {
+                    "type": "custom",
+                    "data": serialized_data
+                }
+
+            # 4. 其他类型（debug, tasks等）按需处理
+            else:
+                # 对于不需要的类型，直接跳过
+                continue
+
+            # 确保事件有数据再发送
+            if processed_event:
+                yield processed_event
\ No newline at end of file
diff --git a/app/backend.py b/app/backend.py
index 5cb1ef2..f531a36 100644
--- a/app/backend.py
+++ b/app/backend.py
@@ -25,7 +25,7 @@ load_dotenv()
 # 优先级：环境变量 DB_URI > Docker 内部服务名 > 本地开发地址
 DB_URI = os.getenv(
     "DB_URI",
-    "postgresql://postgres:mysecretpassword@ai-postgres:5432/langgraph_db?sslmode=disable"
+    "postgresql://postgres:huang1998@ai-postgres:5432/langgraph_db?sslmode=disable"
 )
 
 
diff --git a/app/config.py b/app/config.py
index 1c72e8e..d35e467 100644
--- a/app/config.py
+++ b/app/config.py
@@ -16,8 +16,9 @@ MEMORY_SUMMARIZE_INTERVAL = int(os.getenv("MEMORY_SUMMARIZE_INTERVAL", "10"))
 
 # ========== Mem0 记忆层配置 ==========
 # Qdrant 向量数据库地址
-QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
+QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
 QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME", "mem0_user_memories")
 
-# vLLM Embedding 服务地址 (用于 Mem0 的向量化)
-VLLM_EMBEDDING_URL = os.getenv("VLLM_EMBEDDING_URL", "http://localhost:8082/v1")
+# llama.cpp Embedding 服务地址 (用于 Mem0 的向量化)
+LLAMACPP_EMBEDDING_URL = os.getenv("LLAMACPP_EMBEDDING_URL", "http://127.0.0.1:8082/v1")
+LLAMACPP_API_KEY = os.getenv("LLAMACPP_API_KEY", "your-llamacpp-api-key")
\ No newline at end of file
diff --git a/app/graph_builder.py b/app/graph_builder.py
index cc72d84..2af230a 100644
--- a/app/graph_builder.py
+++ b/app/graph_builder.py
@@ -16,6 +16,7 @@ from app.nodes import (
     should_continue
 )
 from app.memory import Mem0Client
+from app.nodes.finalize import finalize_node
 
 
 class GraphBuilder:
@@ -57,6 +58,7 @@ class GraphBuilder:
         builder.add_node("llm_call", llm_call_node)
         builder.add_node("tool_node", tool_call_node)
         builder.add_node("summarize", summarize_node)
+        builder.add_node("finalize", finalize_node)
 
         # 添加边
         builder.add_edge(START, "retrieve_memory")
@@ -67,10 +69,11 @@ class GraphBuilder:
             {
                 "tool_node": "tool_node",
                 "summarize": "summarize",
-                'END': END
+                "finalize": "finalize"
             }
         )
         builder.add_edge("tool_node", "llm_call")
-        builder.add_edge("summarize", END)
+        builder.add_edge("summarize", "finalize")
+        builder.add_edge("finalize", END)
         
-        return builder
+        return builder
\ No newline at end of file
diff --git a/app/history.py b/app/history.py
index 55b1032..d814aad 100644
--- a/app/history.py
+++ b/app/history.py
@@ -28,11 +28,14 @@ class ThreadHistoryService:
         try:
             # 查询 checkpoints 表获取用户的线程列表
             async with self.checkpointer.conn.cursor() as cur:
-                # 查询每个线程的最新 checkpoint 和创建时间
+                # 在较新的 LangGraph 版本中，AsyncPostgresSaver 创建的 checkpoints 表
+                # 没有 created_at 列，而是使用 checkpoint_id 作为时间排序依据。
+                # 我们可以直接按 thread_id 去重，并用 checkpoint_id 排序。
+                # 另外，用户的 metadata 存储在 metadata JSONB 列中。
                 query = """
                     SELECT 
                         thread_id,
-                        MAX(created_at) as last_updated
+                        MAX(checkpoint_id) as last_updated
                     FROM checkpoints
                     WHERE metadata->>'user_id' = %s
                     GROUP BY thread_id
@@ -49,17 +52,20 @@ class ThreadHistoryService:
                     # 获取该线程的状态
                     state = await self.checkpointer.aget_tuple({"configurable": {"thread_id": thread_id}})
                     
-                    if state and state.values:
-                        messages = state.values.get("messages", [])
-                        summary = self._extract_summary(messages)
-                        message_count = len([m for m in messages if hasattr(m, 'type') and m.type in ["human", "ai"]])
+                    if state and hasattr(state, 'checkpoint') and isinstance(state.checkpoint, dict):
+                        messages = state.checkpoint.get("channel_values", {}).get("messages", [])
                         
-                        threads.append({
-                            "thread_id": thread_id,
-                            "last_updated": row['last_updated'].isoformat() if row['last_updated'] else "",
-                            "summary": summary,
-                            "message_count": message_count
-                        })
+                        if messages:
+                            summary = self._extract_summary(messages)
+                            message_count = len([m for m in messages if hasattr(m, 'type') and m.type in ["human", "ai"]])
+                            
+                            threads.append({
+                                "thread_id": thread_id,
+                                # checkpoint_id 是一个类似于 uuid 的字符串，其中可能包含时间戳信息，也可以直接作为唯一标识
+                                "last_updated": row['last_updated'] if row['last_updated'] else "",
+                                "summary": summary,
+                                "message_count": message_count
+                            })
                 
                 return threads
                 
@@ -80,10 +86,13 @@ class ThreadHistoryService:
         try:
             state = await self.checkpointer.aget_tuple({"configurable": {"thread_id": thread_id}})
             
-            if state is None or not state.values:
+            if state is None:
                 return []
             
-            messages = state.values.get("messages", [])
+            messages = state.checkpoint.get("channel_values", {}).get("messages", []) if hasattr(state, 'checkpoint') and isinstance(state.checkpoint, dict) else []
+            
+            if not messages:
+                return []
             
             # 转换 LangChain 消息对象为字典
             result = []
diff --git a/app/memory/mem0_client.py b/app/memory/mem0_client.py
index eec47d6..60a4274 100644
--- a/app/memory/mem0_client.py
+++ b/app/memory/mem0_client.py
@@ -3,142 +3,151 @@ Mem0 记忆层客户端封装模块
 负责 Mem0 的初始化、检索和存储
 """
 
-import os
+import asyncio
 from typing import Optional, List, Dict, Any
 from mem0 import AsyncMemory
 
-# 本地模块
-from app.config import QDRANT_URL, QDRANT_COLLECTION_NAME, VLLM_EMBEDDING_URL
+from app.config import QDRANT_URL, QDRANT_COLLECTION_NAME, LLAMACPP_EMBEDDING_URL, LLAMACPP_API_KEY
 from app.logger import info, warning, error
 
 
 class Mem0Client:
     """Mem0 异步客户端封装类"""
-    
+
     def __init__(self, llm_instance):
         """
         初始化 Mem0 客户端
-        
+
         Args:
             llm_instance: LangChain LLM 实例（用于事实提取）
         """
         self.llm = llm_instance
         self.mem0: Optional[AsyncMemory] = None
         self._initialized = False
-    
+
     async def initialize(self):
-        """异步初始化 Mem0 客户端"""
+        """异步初始化 Mem0 客户端，并进行实际连接测试"""
         if self._initialized:
             return
-        
-        try:
-            # 检查 Qdrant 是否可达 (可选)
-            import requests
-            try:
-                resp = requests.get(f"{QDRANT_URL}/collections", timeout=2)
-                if resp.status_code == 200:
-                    info(f"✅ Qdrant 服务正常: {QDRANT_URL}")
-            except Exception:
-                warning(f"⚠️ 无法连接到 Qdrant: {QDRANT_URL}，Mem0 将尝试自动连接")
 
+        try:
+            # Mem0 配置
             config = {
-                # 向量存储：复用 Qdrant 实例
                 "vector_store": {
                     "provider": "qdrant",
                     "config": {
+                        "url": QDRANT_URL,                     # 直接使用完整 URL
                         "collection_name": QDRANT_COLLECTION_NAME,
-                        "host": QDRANT_URL.split("://")[1].split(":")[0] if "://" in QDRANT_URL else "localhost",
-                        "port": int(QDRANT_URL.split(":")[-1]) if ":" in QDRANT_URL.split("://")[-1] else 6333,
-                        "embedding_model_dims": 768,  # embeddinggemma-300m 输出 768 维
+                        "embedding_model_dims": 768,
                     }
                 },
-                # 事实提取 LLM：直接复用传入的 LangChain 实例
                 "llm": {
                     "provider": "langchain",
                     "config": {
-                        "model": self.llm  # 直接传入 LangChain 模型实例
+                        "model": self.llm
                     }
                 },
-                # Embedding：指向 vLLM 服务
                 "embedder": {
                     "provider": "openai",
-                    "embedding_dims": 768,  # 关键：将维度参数提升到顶层
                     "config": {
-                        "model": "google/embeddinggemma-300m",
-                        "api_key": "EMPTY",
-                        "api_base": VLLM_EMBEDDING_URL,
-                        # 注意：不要在此处传递 dimensions 参数，避免与 vLLM v0.7.2 不兼容
-                    }
+                        "model": "embeddinggemma-300M-Q8_0",
+                        "api_key": LLAMACPP_API_KEY,
+                        "openai_base_url": LLAMACPP_EMBEDDING_URL,
+                    },
                 },
                 "version": "v1.1"
             }
 
             self.mem0 = AsyncMemory.from_config(config)
-            self._initialized = True
-            info(f"✅ Mem0 初始化成功 (Embedding: vLLM@8002, Vector: Qdrant, LLM: 复用现有实例)")
+            info("✅ Mem0 配置加载成功，开始连接测试...")
 
-        except Exception as e:
-            error(f"❌ Mem0 初始化失败: {e}")
-            import traceback
-            traceback.print_exc()
+            # 实际连接测试：调用一次 search 确保 Qdrant 和 Embedding 都可达
+            await asyncio.wait_for(
+                self.mem0.search("ping", user_id="test", limit=1),
+                timeout=60.0
+            )
+            info("✅ Mem0 实际连接测试成功，初始化完成")
+            self._initialized = True
+
+        except asyncio.TimeoutError:
+            error("❌ Mem0 连接测试超时 (10s)，请检查 Qdrant 或 Embedding 服务响应")
             self.mem0 = None
-    
+            self._initialized = False
+        except Exception as e:
+            error(f"❌ Mem0 初始化或连接测试失败: {e}")
+            import traceback
+            error(f"详细错误信息:\n{traceback.format_exc()}")
+            self.mem0 = None
+            self._initialized = False
+
     async def search_memories(self, query: str, user_id: str, limit: int = 5) -> List[str]:
         """
         检索相关记忆
-        
+
         Args:
             query: 查询文本
             user_id: 用户 ID
             limit: 返回结果数量限制
-            
+
         Returns:
             List[str]: 记忆事实列表
         """
         if not self.mem0:
             warning("⚠️ Mem0 未初始化，跳过记忆检索")
             return []
-        
+
         try:
-            memories = await self.mem0.search(query, user_id=user_id, limit=limit)
-            
+            memories = await asyncio.wait_for(
+                self.mem0.search(query, user_id=user_id, limit=limit),
+                timeout=30.0
+            )
+
             if memories and "results" in memories:
                 facts = [m["memory"] for m in memories["results"] if m.get("memory")]
                 if facts:
                     info(f"🔍 [记忆检索] Mem0 返回 {len(facts)} 条记忆")
                     return facts
-            
+
             info("🔍 [记忆检索] 未找到相关记忆")
             return []
-            
+
+        except asyncio.TimeoutError:
+            warning("⚠️ Mem0 检索超时 (30s)，跳过本次记忆检索")
+            return []
         except Exception as e:
             warning(f"⚠️ Mem0 检索失败: {e}")
             return []
-    
+
     async def add_memories(self, messages: List[Dict[str, str]], user_id: str) -> bool:
         """
         添加记忆（自动提取事实并存储）
-        
+
         Args:
             messages: 消息列表，格式为 [{"role": "user/assistant/system", "content": "..."}]
             user_id: 用户 ID
-            
+
         Returns:
             bool: 是否成功
         """
         if not self.mem0:
             warning("⚠️ Mem0 未初始化，跳过记忆添加")
             return False
-        
+
         try:
-            result = await self.mem0.add(
-                messages,
-                user_id=user_id,
-                metadata={"type": "conversation"}
+            await asyncio.wait_for(
+                self.mem0.add(
+                    messages,
+                    user_id=user_id,
+                    metadata={"type": "conversation"}
+                ),
+                timeout=60.0
             )
-            info(f"📝 [记忆添加] 已提交给 Mem0 进行事实提取")
+            info("📝 [记忆添加] 已提交给 Mem0 进行事实提取")
             return True
-            
+
+        except asyncio.TimeoutError:
+            error("❌ Mem0 记忆添加超时 (60s)")
+            return False
         except Exception as e:
             error(f"❌ Mem0 记忆添加失败: {e}")
-            return False
+            return False
\ No newline at end of file
diff --git a/app/nodes/__init__.py b/app/nodes/__init__.py
index 371a973..8d279db 100644
--- a/app/nodes/__init__.py
+++ b/app/nodes/__init__.py
@@ -7,6 +7,7 @@ from app.nodes.llm_call import create_llm_call_node
 from app.nodes.tool_call import create_tool_call_node
 from app.nodes.retrieve_memory import create_retrieve_memory_node
 from app.nodes.summarize import create_summarize_node
+from app.nodes.finalize import finalize_node
 
 __all__ = [
     "should_continue",
@@ -14,4 +15,5 @@ __all__ = [
     "create_tool_call_node",
     "create_retrieve_memory_node",
     "create_summarize_node",
+    "finalize_node",
 ]
diff --git a/app/nodes/finalize.py b/app/nodes/finalize.py
new file mode 100644
index 0000000..a283587
--- /dev/null
+++ b/app/nodes/finalize.py
@@ -0,0 +1,47 @@
+"""
+完成事件节点模块
+负责发送完成事件，包含token使用情况和耗时信息
+"""
+
+from typing import Any, Dict
+from langgraph.runtime import Runtime
+from langgraph.config import get_stream_writer
+
+# 本地模块
+from app.state import MessagesState, GraphContext
+from app.utils.logging import log_state_change
+from app.logger import info, error
+
+
+from langchain_core.runnables.config import RunnableConfig
+
+async def finalize_node(state: MessagesState, config: RunnableConfig) -> Dict[str, Any]:
+    """
+    完成事件节点 - 发送完成事件，包含token使用情况和耗时信息
+    
+    Args:
+        state: 当前对话状态
+        config: 运行时配置
+        
+    Returns:
+        空字典（完成节点，无状态更新）
+    """
+    log_state_change("finalize", state, "进入")
+
+    try:
+        # 获取流式写入器并发送完成事件
+        writer = get_stream_writer()
+        writer({
+            "type": "custom", 
+            "data": {
+                "type": "done",
+                "token_usage": state.get("last_token_usage", {}),
+                "elapsed_time": state.get("last_elapsed_time", 0.0)
+            }
+        })
+        info("🏁 [完成事件] 已发送完成事件，包含token使用情况和耗时信息")
+    except Exception as e:
+        error(f"❌ [完成事件] 发送完成事件时发生异常: {e}")
+    
+    log_state_change("finalize", state, "离开")
+    return {}
\ No newline at end of file
diff --git a/app/nodes/llm_call.py b/app/nodes/llm_call.py
index 78a3e7f..79361cd 100644
--- a/app/nodes/llm_call.py
+++ b/app/nodes/llm_call.py
@@ -32,15 +32,19 @@ def create_llm_call_node(llm: BaseLLM, tools: list):
     # 构建调用链
     prompt = create_system_prompt()
     llm_with_tools = llm.bind_tools(tools)
-    chain = prompt | RunnableLambda(print_llm_input) | llm_with_tools
     
-    async def call_llm(state: MessagesState, runtime: Runtime[GraphContext]) -> Dict[str, Any]:
+    # 恢复带 RunnableLambda 的链，并在下方使用 astream 遍历
+    chain = prompt  | llm_with_tools
+    
+    from langchain_core.runnables.config import RunnableConfig
+    
+    async def call_llm(state: MessagesState, config: RunnableConfig) -> Dict[str, Any]:
         """
         LLM 调用节点（异步方法）
         
         Args:
             state: 当前对话状态
-            runtime: LangGraph 运行时上下文
+            config: LangChain/LangGraph 自动注入的配置，包含 callbacks 等信息
             
         Returns:
             更新后的状态字典
@@ -48,17 +52,28 @@ def create_llm_call_node(llm: BaseLLM, tools: list):
         log_state_change("llm_call", state, "进入")
         
         memory_context = state.get("memory_context", "暂无用户信息")
-        loop = asyncio.get_event_loop()
         start_time = time.time()
         
         try:
-            response = await loop.run_in_executor(
-                None, 
-                lambda: chain.invoke({
+            # 恢复为：手动进行 astream，并将所有的 chunk 拼接成最终的 response 返回。
+            # LangGraph 会自动监听这期间产生的所有 token。
+            chunks = []
+            async for chunk in chain.astream(
+                {
                     "messages": state["messages"],
                     "memory_context": memory_context
-                })
-            )
+                },
+                config=config
+            ):
+                chunks.append(chunk)
+
+            # 将所有 chunk 合并成最终的 AIMessage
+            if chunks:
+                response = chunks[0]
+                for chunk in chunks[1:]:
+                    response = response + chunk
+            else:
+                response = AIMessage(content="")
             
             elapsed_time = time.time() - start_time
             
@@ -85,13 +100,7 @@ def create_llm_call_node(llm: BaseLLM, tools: list):
             if token_usage:
                 input_tokens = token_usage.get('prompt_tokens', token_usage.get('input_tokens', 0))
                 output_tokens = token_usage.get('completion_tokens', token_usage.get('output_tokens', 0))
-            
-            # 打印响应统计信息
-            info(f"⏱️  [LLM统计] 调用耗时: {elapsed_time:.2f}秒")
-            info(f"📊 [LLM统计] Token用量: 输入={input_tokens}, 输出={output_tokens}, 总计={input_tokens + output_tokens}")
-            if token_usage:
-                debug(f"📋 [LLM统计] 详细用量: {token_usage}")
-            
+           
             # 打印 LLM 的完整输出
             debug("\n" + "="*80)
             debug("📥 [LLM输出] 大模型返回的完整响应:")
@@ -99,6 +108,12 @@ def create_llm_call_node(llm: BaseLLM, tools: list):
             debug(f"   内容长度: {len(str(response.content))} 字符")
             debug("-"*80)
             debug(f"{response.content}")
+
+            # 打印响应统计信息
+            info(f"⏱️  [LLM统计] 调用耗时: {elapsed_time:.2f}秒")
+            info(f"📊 [LLM统计] Token用量: 输入={input_tokens}, 输出={output_tokens}, 总计={input_tokens + output_tokens}")
+            if token_usage:
+                debug(f"📋 [LLM统计] 详细用量: {token_usage}")
             debug("="*80 + "\n")
             
             result = {
diff --git a/app/nodes/retrieve_memory.py b/app/nodes/retrieve_memory.py
index 4f63b65..0313ca8 100644
--- a/app/nodes/retrieve_memory.py
+++ b/app/nodes/retrieve_memory.py
@@ -24,20 +24,23 @@ def create_retrieve_memory_node(mem0_client: Mem0Client):
         异步节点函数
     """
     
-    async def retrieve_memory(state: MessagesState, runtime: Runtime[GraphContext]) -> Dict[str, Any]:
+    from langchain_core.runnables.config import RunnableConfig
+    
+    async def retrieve_memory(state: MessagesState, config: RunnableConfig) -> Dict[str, Any]:
         """
         记忆检索节点 - 使用 Mem0
         
         Args:
             state: 当前对话状态
-            runtime: LangGraph 运行时上下文
+            config: 运行时配置
             
         Returns:
             包含 memory_context 的状态更新
         """
         log_state_change("retrieve_memory", state, "进入")
 
-        user_id = runtime.context.user_id
+        # 从 metadata 中获取 user_id
+        user_id = config.get("metadata", {}).get("user_id", "default_user")
         
         # 兼容 dict 和对象两种消息格式
         last_msg = state["messages"][-1]
diff --git a/app/nodes/router.py b/app/nodes/router.py
index 3dad4d0..81d2e7e 100644
--- a/app/nodes/router.py
+++ b/app/nodes/router.py
@@ -12,7 +12,7 @@ from app.state import MessagesState
 from app.logger import info
 
 
-def should_continue(state: MessagesState) -> Literal['tool_node', 'summarize', 'END']:
+def should_continue(state: MessagesState) -> Literal['tool_node', 'summarize', 'finalize']:
     """
     决定下一步：工具调用、生成摘要还是结束
     
@@ -20,7 +20,7 @@ def should_continue(state: MessagesState) -> Literal['tool_node', 'summarize', '
         state: 当前对话状态
         
     Returns:
-        下一个节点名称或 END
+        下一个节点名称
     """
     last_message = state["messages"][-1]
 
@@ -40,9 +40,9 @@ def should_continue(state: MessagesState) -> Literal['tool_node', 'summarize', '
         else:
             if ENABLE_GRAPH_TRACE:
                 info(f"🔀 [路由决策] 收到 AI 最终回复，未达摘要阈值({turns}/{MEMORY_SUMMARIZE_INTERVAL}) → 结束流程")
-            return 'END'
+            return 'finalize'
 
     # 3. 其他情况（如只有用户消息）直接结束
     if ENABLE_GRAPH_TRACE:
         info(f"🔀 [路由决策] 非 AI 消息（如纯用户消息） → 结束流程")
-    return 'END'
+    return 'finalize'
diff --git a/app/nodes/summarize.py b/app/nodes/summarize.py
index bbf68c8..a49742a 100644
--- a/app/nodes/summarize.py
+++ b/app/nodes/summarize.py
@@ -24,13 +24,15 @@ def create_summarize_node(mem0_client: Mem0Client):
         异步节点函数
     """
     
-    async def summarize_conversation(state: MessagesState, runtime: Runtime[GraphContext]) -> Dict[str, Any]:
+    from langchain_core.runnables.config import RunnableConfig
+    
+    async def summarize_conversation(state: MessagesState, config: RunnableConfig) -> Dict[str, Any]:
         """
         记忆存储节点 - 使用 Mem0
         
         Args:
             state: 当前对话状态
-            runtime: LangGraph 运行时上下文
+            config: 运行时配置
             
         Returns:
             重置计数器的状态更新
@@ -42,7 +44,8 @@ def create_summarize_node(mem0_client: Mem0Client):
             debug("📝 [记忆添加] 对话过短，跳过")
             return {"turns_since_last_summary": 0}
 
-        user_id = runtime.context.user_id
+        # 从 metadata 中获取 user_id
+        user_id = config.get("metadata", {}).get("user_id", "default_user")
 
         # 确保 Mem0 已初始化（懒加载）
         if not mem0_client._initialized:
@@ -83,4 +86,4 @@ def create_summarize_node(mem0_client: Mem0Client):
         log_state_change("summarize", state, "离开")
         return {"turns_since_last_summary": 0}
     
-    return summarize_conversation
+    return summarize_conversation
\ No newline at end of file
diff --git a/app/nodes/tool_call.py b/app/nodes/tool_call.py
index 348abc2..12648ff 100644
--- a/app/nodes/tool_call.py
+++ b/app/nodes/tool_call.py
@@ -7,6 +7,7 @@ import asyncio
 from typing import Any, Dict
 from langchain_core.messages import AIMessage, ToolMessage
 from langgraph.runtime import Runtime
+from langgraph.config import get_stream_writer
 
 # 本地模块
 from app.state import MessagesState, GraphContext
@@ -25,13 +26,15 @@ def create_tool_call_node(tools_by_name: Dict[str, Any]):
         异步节点函数
     """
     
-    async def call_tools(state: MessagesState, runtime: Runtime[GraphContext]) -> Dict[str, Any]:
+    from langchain_core.runnables.config import RunnableConfig
+    
+    async def call_tools(state: MessagesState, config: RunnableConfig) -> Dict[str, Any]:
         """
         工具执行节点（异步方法）
         
         Args:
             state: 当前对话状态
-            runtime: LangGraph 运行时上下文
+            config: 运行时配置
             
         Returns:
             包含 ToolMessage 的状态更新
@@ -62,6 +65,10 @@ def create_tool_call_node(tools_by_name: Dict[str, Any]):
                 results.append(ToolMessage(content=err_msg, tool_call_id=tool_id))
                 continue
 
+            # 获取流式写入器并发送工具调用开始事件
+            writer = get_stream_writer()
+            writer({"type": "custom", "data": {"type": "tool_start", "tool": tool_name}})
+            
             try:
                 # 修复闭包问题：将变量作为默认参数传入 lambda
                 # 如果工具支持异步 (ainvoke)，优先使用异步调用
@@ -77,9 +84,15 @@ def create_tool_call_node(tools_by_name: Dict[str, Any]):
                 result_preview = str(observation).replace("\n", " ")
                 debug(f"   └─ ✅ 结果: {result_preview}")
                 results.append(ToolMessage(content=str(observation), tool_call_id=tool_id))
+                
+                # 发送工具调用完成事件
+                writer({"type": "custom", "data": {"type": "tool_end", "tool": tool_name, "success": True}})
             except Exception as e:
                 debug(f"   └─ ❌ 异常: {e}")
                 results.append(ToolMessage(content=f"Error: {e}", tool_call_id=tool_id))
+                
+                # 发送工具调用失败事件
+                writer({"type": "custom", "data": {"type": "tool_end", "tool": tool_name, "success": False, "error": str(e)}})
 
         info(f"🛠️  [工具调用] 执行完成，返回 {len(results)} 条 ToolMessage")
         
@@ -87,4 +100,4 @@ def create_tool_call_node(tools_by_name: Dict[str, Any]):
         log_state_change("tool_node", {**state, **result}, "离开")
         return result
     
-    return call_tools
+    return call_tools
\ No newline at end of file
diff --git a/app/prompts.py b/app/prompts.py
index f585539..f0f2c74 100644
--- a/app/prompts.py
+++ b/app/prompts.py
@@ -27,9 +27,10 @@ def create_system_prompt() -> ChatPromptTemplate:
         "- 抓取网页内容：`fetch_webpage_content`\n"
         "工具调用时请直接返回所需参数，无需额外说明。\n\n"
         "【回答要求（必须遵守）】\n"
-        "1. 回答必须简洁、直接，禁止描述任何思考过程或内心活动。\n"
-        "2. 优先利用已知用户信息进行个性化回复。\n"
-        "3. 若无信息可依，礼貌询问或提供通用帮助。"
+        "1. 回答必须简洁、直接。\n"
+        "2. 如果你认为该问题需要进行深入的推理或思考，请务必将你的思维链或推理过程用 `<think>` 和 `</think>` 标签包裹起来，放在回答的最前面。例如：<think>这里是我的思考过程...</think>这里是最终回答。\n"
+        "3. 优先利用已知用户信息进行个性化回复。\n"
+        "4. 若无信息可依，礼貌询问或提供通用帮助。"
     )
     
     return ChatPromptTemplate.from_messages([
diff --git a/app/utils/logging.py b/app/utils/logging.py
index 659303c..8228366 100644
--- a/app/utils/logging.py
+++ b/app/utils/logging.py
@@ -25,10 +25,10 @@ def log_state_change(node_name: str, state: dict, prefix: str = "进入"):
     if last_msg:
         # 兼容 dict 和对象两种格式
         if isinstance(last_msg, dict):
-            content_preview = str(last_msg.get("content", ""))[:100].replace("\n", " ")
+            content_preview = str(last_msg.get("content", ""))[:10].replace("\n", " ")
             msg_type = last_msg.get("type", "unknown")
         else:
-            content_preview = str(last_msg.content)[:100].replace("\n", " ")
+            content_preview = str(last_msg.content)[:10].replace("\n", " ")
             msg_type = getattr(last_msg, 'type', 'unknown')
         last_info = f"{msg_type.upper()}: {content_preview}"
     info(f"🔄 [{node_name}] {prefix} | 消息数:{msg_count} | 最后一条:{last_info}")
diff --git a/docker/Dockerfile.backend b/docker/Dockerfile.backend
index 38cdda6..f322235 100644
--- a/docker/Dockerfile.backend
+++ b/docker/Dockerfile.backend
@@ -8,8 +8,8 @@ WORKDIR /app
 ENV PYTHONPATH=/app
 
 # llama.cpp 服务配置（本地部署标准端口）
-ENV VLLM_BASE_URL=http://localhost:8081/v1
-ENV VLLM_EMBEDDING_URL=http://localhost:8082/v1
+ENV VLLM_BASE_URL=http://host.docker.internal:18000/v1
+ENV LLAMACPP_EMBEDDING_URL=http://host.docker.internal:18001/v1
 
 # Mem0 记忆层配置
 ENV QDRANT_COLLECTION_NAME=mem0_user_memories
diff --git a/docker/Dockerfile.frontend b/docker/Dockerfile.frontend
index c5ccefa..4d3f112 100644
--- a/docker/Dockerfile.frontend
+++ b/docker/Dockerfile.frontend
@@ -12,4 +12,4 @@ ENV PYTHONPATH=/app
 
 EXPOSE 8501
 
-CMD ["streamlit", "run", "frontend/frontend.py", "--server.port", "8501", "--server.address", "0.0.0.0", "--server.baseUrlPath", "/ai"]
+CMD ["streamlit", "run", "frontend/frontend_main.py", "--server.port", "8501", "--server.address", "0.0.0.0", "--server.baseUrlPath", "/ai"]
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 6f3fe25..74980b7 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -12,19 +12,19 @@ services:
       - ZHIPUAI_API_KEY=${ZHIPUAI_API_KEY}
       - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY}
       - LLAMACPP_API_KEY=${LLAMACPP_API_KEY}
-      
+
       # ⭐ 日志调试配置：通过 .env 注入（支持灵活调整）
       - LOG_LEVEL=${LOG_LEVEL:-WARNING}
       - DEBUG=${DEBUG:-false}
       - ENABLE_GRAPH_TRACE=${ENABLE_GRAPH_TRACE:-false}
-      
+
       # ⭐ 基础设施配置：固化在 compose 文件中
       # PostgreSQL 连接（远程服务器）
-      - DB_URI=postgresql://postgres:mysecretpassword@115.190.121.151:5432/langgraph_db?sslmode=disable
-      
+      - DB_URI=postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable
+
       # Qdrant 向量数据库（远程服务器）
       - QDRANT_URL=http://115.190.121.151:6333
-      
+
       # 前端通信地址（Docker 内部网络）
       - API_URL=http://backend:8083/chat
     volumes:
@@ -60,4 +60,4 @@ networks:
 # ⭐ PostgreSQL 和 Qdrant 已迁移到远程服务器，不再需要本地卷
 # volumes:
 #   pg_data:
-#   qdrant_storage:
\ No newline at end of file
+#   qdrant_storage:
diff --git a/frontend/README.md b/frontend/README.md
deleted file mode 100644
index d6edf1e..0000000
--- a/frontend/README.md
+++ /dev/null
@@ -1,246 +0,0 @@
-# ✨ 前端模块化重构总结
-
-## 📊 重构成果
-
-### 文件结构对比
-
-#### 重构前
-```
-frontend/
-└── frontend.py          # 280+ 行单体文件
-```
-
-#### 重构后
-```
-frontend/
-├── __init__.py          # 包初始化
-├── frontend.py          # 主入口（48 行）
-├── config.py            # 配置管理（62 行）
-├── state.py             # 状态管理（120 行）
-├── api_client.py        # API 客户端（164 行）
-├── utils.py             # 工具函数（56 行）
-├── components/
-│   ├── __init__.py
-│   ├── sidebar.py       # 左侧栏（156 行）
-│   ├── chat_area.py     # 中间栏（156 行）
-│   └── info_panel.py    # 右侧栏（63 行）
-└── REFACTOR.md          # 重构文档
-```
-
----
-
-## 🎯 核心改进
-
-### 1. **代码量优化**
-
-| 模块 | 行数 | 说明 |
-|------|------|------|
-| [frontend.py](file:///home/huang/Study/AIProject/Agent1/frontend/frontend.py) | 48 行 | ✅ -83%（原 280+ 行） |
-| [config.py](file:///home/huang/Study/AIProject/Agent1/frontend/config.py) | 62 行 | 新增配置管理 |
-| [state.py](file:///home/huang/Study/AIProject/Agent1/frontend/state.py) | 120 行 | 新增状态管理 |
-| [api_client.py](file:///home/huang/Study/AIProject/Agent1/frontend/api_client.py) | 164 行 | 新增 API 客户端 |
-| [components/sidebar.py](file:///home/huang/Study/AIProject/Agent1/frontend/components/sidebar.py) | 156 行 | 左侧栏组件 |
-| [components/chat_area.py](file:///home/huang/Study/AIProject/Agent1/frontend/components/chat_area.py) | 156 行 | 中间聊天区 |
-| [components/info_panel.py](file:///home/huang/Study/AIProject/Agent1/frontend/components/info_panel.py) | 63 行 | 右侧信息面板 |
-
-**总计**：769 行（模块化后），平均每个文件 < 110 行
-
----
-
-### 2. **架构设计**
-
-#### 分层架构
-```
-┌─────────────────────────────────────┐
-│    表现层 (Components)               │  ← UI 渲染
-│    sidebar, chat_area, info_panel    │
-├─────────────────────────────────────┤
-│    业务层 (State)                    │  ← 状态管理
-│    AppState 类                       │
-├─────────────────────────────────────┤
-│    数据层 (API Client)               │  ← 后端通信
-│    APIClient 类                      │
-├─────────────────────────────────────┤
-│    配置层 (Config)                   │  ← 配置管理
-│    FrontendConfig 数据类              │
-└─────────────────────────────────────┘
-```
-
-#### 依赖关系
-```
-Components → State → API Client → Config
-     ↑                        ↓
-     └──────── 全局单例 ────────┘
-```
-
----
-
-### 3. **设计模式应用**
-
-| 模式 | 应用场景 | 优势 |
-|------|---------|------|
-| **单例模式** | `config`, `api_client` 全局实例 | 避免重复初始化 |
-| **外观模式** | [AppState](file:///home/huang/Study/AIProject/Agent1/frontend/state.py#L11-L117) 封装 Session State | 统一状态操作接口 |
-| **模块模式** | `components/` 独立组件 | 职责单一，易于维护 |
-| **数据类** | [FrontendConfig](file:///home/huang/Study/AIProject/Agent1/frontend/config.py#L13-L66) 配置管理 | 类型安全，IDE 友好 |
-
----
-
-## 🚀 使用方式
-
-### 本地开发
-```bash
-# 启动前后端
-./scripts/start.sh both
-
-# 访问前端
-open http://localhost:8501
-```
-
-### Docker 部署
-```bash
-# 配置环境变量
-cp .env.docker .env
-# 编辑 .env 填入 API Key
-
-# 启动服务
-cd docker
-docker compose up -d
-```
-
----
-
-## 📝 扩展示例
-
-### 示例 1：添加对话导出功能
-
-只需修改 [components/sidebar.py](file:///home/huang/Study/AIProject/Agent1/frontend/components/sidebar.py)：
-
-```python
-def _render_history_actions():
-    """渲染历史操作按钮"""
-    if st.button("🔄 刷新列表", use_container_width=True):
-        _refresh_threads()
-    
-    if st.button("➕ 新对话", type="primary", use_container_width=True):
-        AppState.start_new_thread()
-        st.rerun()
-    
-    # 新增：导出按钮
-    if st.button("📤 导出对话", use_container_width=True):
-        _export_conversation()
-
-def _export_conversation():
-    """导出当前对话"""
-    messages = AppState.get_messages()
-    content = "\n\n".join([
-        f"**{m['role'].upper()}**: {m['content']}" 
-        for m in messages
-    ])
-    st.download_button(
-        label="下载 Markdown",
-        data=content,
-        file_name="conversation.md",
-        mime="text/markdown"
-    )
-```
-
-**影响范围**：仅修改 `sidebar.py`，不影响其他模块！
-
----
-
-### 示例 2：添加暗色主题
-
-修改 [config.py](file:///home/huang/Study/AIProject/Agent1/frontend/config.py)：
-
-```python
-@dataclass
-class FrontendConfig:
-    # ... 现有配置 ...
-    theme: str = "light"  # 新增主题配置
-
-# 在 frontend.py 中应用
-if config.theme == "dark":
-    st.markdown("""
-        <style>
-        .stApp { background-color: #0e1117; }
-        </style>
-    """, unsafe_allow_html=True)
-```
-
----
-
-### 示例 3：添加消息统计图表
-
-修改 [components/info_panel.py](file:///home/huang/Study/AIProject/Agent1/frontend/components/info_panel.py)：
-
-```python
-def _render_message_stats():
-    """渲染消息统计"""
-    st.subheader("消息统计")
-    
-    stats = AppState.get_message_stats()
-    
-    # 新增：柱状图
-    import pandas as pd
-    df = pd.DataFrame({
-        '角色': ['用户', 'AI'],
-        '数量': [stats['user'], stats['assistant']]
-    })
-    st.bar_chart(df.set_index('角色'))
-```
-
----
-
-## ✅ 重构优势
-
-### 1. **可维护性** ⭐⭐⭐⭐⭐
-- 每个文件职责单一，平均 < 110 行
-- 修改功能只需改对应模块
-- 代码结构清晰，易于理解
-
-### 2. **可扩展性** ⭐⭐⭐⭐⭐
-- 新增功能不影响现有代码
-- 组件独立，可自由组合
-- 支持插件化开发
-
-### 3. **可测试性** ⭐⭐⭐⭐⭐
-- 各模块独立，便于 Mock
-- 状态管理统一，易于验证
-- API 客户端可独立测试
-
-### 4. **代码质量** ⭐⭐⭐⭐⭐
-- 遵循 SOLID 原则
-- 类型提示完整
-- 符合 Clean Architecture
-
-### 5. **团队协作** ⭐⭐⭐⭐⭐
-- 多人并行开发不同组件
-- 减少代码冲突
-- 降低 Review 难度
-
----
-
-## 📚 文档资源
-
-| 文档 | 说明 |
-|------|------|
-| [frontend/REFACTOR.md](file:///home/huang/Study/AIProject/Agent1/frontend/REFACTOR.md) | 详细重构说明和架构设计 |
-| [FEATURES.md](file:///home/huang/Study/AIProject/Agent1/FEATURES.md) | 功能使用说明 |
-| [README.md](file:///home/huang/Study/AIProject/Agent1/README.md) | 项目总体说明 |
-
----
-
-## 🎉 总结
-
-本次重构将前端从 **280+ 行单体文件** 改造为 **模块化分层架构**，实现了：
-
-✅ **代码精简**：主文件从 280+ 行降至 48 行（-83%）  
-✅ **模块化**：拆分为 7 个独立模块，平均 < 110 行  
-✅ **分层架构**：表现层 → 业务层 → 数据层 → 配置层  
-✅ **类型安全**：使用 dataclass 和类型提示  
-✅ **易于扩展**：新增功能只需修改对应模块  
-✅ **易于测试**：各模块独立，便于 Mock 和单元测试  
-✅ **团队协作**：减少代码冲突，降低 Review 难度  
-
-**前端架构已与后端保持一致的优雅设计！** 🎊
\ No newline at end of file
diff --git a/frontend/REFACTOR.md b/frontend/REFACTOR.md
deleted file mode 100644
index 1675890..0000000
--- a/frontend/REFACTOR.md
+++ /dev/null
@@ -1,289 +0,0 @@
-# 🏗️ 前端重构说明
-
-## 重构目标
-
-将原来的单体 `frontend.py`（280+ 行）拆分为模块化、可维护的架构，参考后端的分层设计模式。
-
----
-
-## 📁 新架构
-
-```
-frontend/
-├── __init__.py              # 包初始化
-├── frontend.py              # 主入口（50 行，仅负责组装）
-├── config.py                # 配置管理（数据类 + 环境变量）
-├── state.py                 # 状态管理（统一 Session State 操作）
-├── api_client.py            # API 客户端（封装所有后端通信）
-├── utils.py                 # 工具函数（通用辅助函数）
-└── components/              # UI 组件
-    ├── __init__.py
-    ├── sidebar.py           # 左侧栏：用户登录 + 历史列表
-    ├── chat_area.py         # 中间栏：聊天区域 + 流式响应
-    └── info_panel.py        # 右侧栏：信息面板
-```
-
----
-
-## 🎯 核心模块说明
-
-### 1. **配置管理** (`config.py`)
-
-**设计理念**：使用 Python `dataclass` 集中管理所有配置，支持环境变量覆盖。
-
-```python
-@dataclass
-class FrontendConfig:
-    api_base: str = ""
-    page_title: str = "AI 个人助手"
-    default_model: str = "zhipu"
-    history_limit: int = 50
-    # ... 其他配置
-
-# 全局配置实例
-config = FrontendConfig()
-```
-
-**优势**：
-- ✅ 类型安全（dataclass 自动类型检查）
-- ✅ 集中管理（所有配置在一处）
-- ✅ 易于测试（可轻松 mock 配置）
-- ✅ 环境变量支持（`__post_init__` 中加载）
-
----
-
-### 2. **状态管理** (`state.py`)
-
-**设计理念**：封装所有 `st.session_state` 操作，提供统一的 API。
-
-```python
-class AppState:
-    @staticmethod
-    def init():
-        """初始化所有状态"""
-        if "user_id" not in st.session_state:
-            st.session_state.user_id = config.default_user_id
-        # ...
-    
-    @staticmethod
-    def login(username: str):
-        """用户登录"""
-        st.session_state.user_id = username.strip()
-        st.session_state.logged_in = True
-    
-    @staticmethod
-    def get_messages() -> List[Dict[str, str]]:
-        """获取消息列表"""
-        return st.session_state.messages
-```
-
-**优势**：
-- ✅ 统一接口（所有状态操作通过 AppState）
-- ✅ 类型提示（IDE 自动补全）
-- ✅ 易于维护（状态逻辑集中）
-- ✅ 避免魔法字符串（不再直接使用 `st.session_state["xxx"]`）
-
----
-
-### 3. **API 客户端** (`api_client.py`)
-
-**设计理念**：封装所有与后端的通信，支持流式响应。
-
-```python
-class APIClient:
-    def get_user_threads(self, user_id: str, limit: int) -> List[Dict]:
-        """获取用户历史列表"""
-        resp = requests.get(f"{self.base_url}/threads", ...)
-        return resp.json().get("threads", [])
-    
-    def chat_stream(self, message: str, ...) -> AsyncGenerator[Dict, None]:
-        """流式对话"""
-        with requests.post(..., stream=True) as response:
-            for line in response.iter_lines():
-                yield json.loads(line)
-```
-
-**优势**：
-- ✅ 职责单一（仅负责 API 通信）
-- ✅ 错误处理集中（统一的异常捕获）
-- ✅ 易于测试（可 mock APIClient）
-- ✅ 流式支持（Generator 逐行 yield）
-
----
-
-### 4. **UI 组件** (`components/`)
-
-**设计理念**：每个组件独立渲染，通过 State 和 API Client 交互。
-
-#### `sidebar.py` - 左侧栏
-```python
-def render_sidebar():
-    """渲染左侧栏"""
-    with st.sidebar:
-        _render_user_section()      # 用户登录
-        _render_history_section()   # 历史列表
-```
-
-#### `chat_area.py` - 中间聊天区
-```python
-def render_chat_area():
-    """渲染中间聊天区域"""
-    _render_model_selector()        # 模型选择
-    _render_chat_container()        # 消息显示
-    _render_input_box()             # 输入框 + 流式响应
-```
-
-#### `info_panel.py` - 右侧信息面板
-```python
-def render_info_panel():
-    """渲染右侧信息面板"""
-    _render_thread_info()           # 当前线程
-    _render_message_stats()         # 消息统计
-    _render_tips()                  # 使用提示
-```
-
-**优势**：
-- ✅ 组件独立（每个文件 < 150 行）
-- ✅ 职责清晰（一个组件一个文件）
-- ✅ 易于复用（可在其他页面复用组件）
-- ✅ 易于测试（可独立测试每个组件）
-
----
-
-### 5. **主入口** (`frontend.py`)
-
-**设计理念**：仅负责组装各模块，代码量 < 50 行。
-
-```python
-from .config import config
-from .state import AppState
-from .components.sidebar import render_sidebar
-from .components.chat_area import render_chat_area
-from .components.info_panel import render_info_panel
-
-st.set_page_config(...)
-AppState.init()
-
-def main():
-    st.title("🤖 个人生活与数据分析助手")
-    
-    col_sidebar, col_chat, col_info = st.columns([1, 3, 1])
-    
-    with col_sidebar:
-        render_sidebar()
-    with col_chat:
-        render_chat_area()
-    with col_info:
-        render_info_panel()
-
-if __name__ == "__main__":
-    main()
-```
-
-**优势**：
-- ✅ 极简主义（< 50 行）
-- ✅ 清晰结构（一眼看懂整体架构）
-- ✅ 易于维护（修改功能只需改对应组件）
-
----
-
-##  重构对比
-
-| 指标 | 重构前 | 重构后 | 改进 |
-|------|--------|--------|------|
-| **主文件行数** | 280+ 行 | 48 行 | ✅ -83% |
-| **代码结构** | 单体文件 | 模块化架构 | ✅ 分层清晰 |
-| **组件独立性** | 耦合严重 | 独立组件 | ✅ 可复用 |
-| **测试友好性** | 难以测试 | 易于 Mock | ✅ 可测试 |
-| **维护成本** | 高（改一处影响全局） | 低（改组件不影响其他） | ✅ 易维护 |
-| **代码可读性** | 差（滚动查找） | 优（模块化） | ✅ 易读 |
-
----
-
-## 🎨 架构设计模式
-
-### 1. **分层架构**
-```
-┌─────────────────────────────────────┐
-│         表现层 (Components)          │
-│  sidebar.py, chat_area.py, ...      │
-├─────────────────────────────────────┤
-│         业务层 (State)               │
-│  state.py - 状态管理                 │
-├─────────────────────────────────────┤
-│         数据层 (API Client)          │
-│  api_client.py - 后端通信            │
-├─────────────────────────────────────┤
-│         配置层 (Config)              │
-│  config.py - 配置管理                │
-└─────────────────────────────────────┘
-```
-
-### 2. **依赖方向**
-```
-Components → State → API Client → Config
-     ↑                        ↓
-     └────────────────────────┘
-         (全局单例实例)
-```
-
-**规则**：
-- ✅ 上层依赖下层
-- ✅ 禁止循环依赖
-- ✅ 配置和客户端为全局单例
-
----
-
-## 🚀 使用示例
-
-### 扩展新功能：添加对话导出按钮
-
-只需修改 `components/sidebar.py`：
-
-```python
-def _render_history_actions():
-    """渲染历史操作按钮"""
-    if st.button("🔄 刷新列表", use_container_width=True):
-        _refresh_threads()
-    
-    if st.button("➕ 新对话", type="primary", use_container_width=True):
-        AppState.start_new_thread()
-        st.rerun()
-    
-    # 新增：导出对话按钮
-    if st.button("📤 导出对话", use_container_width=True):
-        _export_current_thread()
-
-def _export_current_thread():
-    """导出当前对话为 Markdown"""
-    messages = AppState.get_messages()
-    content = "\n\n".join([f"**{m['role']}**: {m['content']}" for m in messages])
-    st.download_button("下载", content, "conversation.md")
-```
-
-**优势**：修改仅影响 `sidebar.py`，不影响其他模块！
-
----
-
-## ✅ 重构优势总结
-
-1. **模块化**：每个文件职责单一，易于理解和维护
-2. **可扩展**：添加新功能只需修改对应模块
-3. **可测试**：各模块独立，便于编写单元测试
-4. **可复用**：组件可在其他项目中复用
-5. **类型安全**：使用 dataclass 和类型提示
-6. **代码质量**：遵循 SOLID 原则和 Clean Architecture
-
----
-
-## 📝 后续优化建议
-
-1. **添加单元测试**：为 `state.py` 和 `api_client.py` 编写测试
-2. **错误边界**：在组件中添加 try-except，避免单个组件崩溃影响全局
-3. **性能优化**：使用 `st.cache_data` 缓存 API 响应
-4. **国际化**：提取所有文本到 `i18n.py`，支持多语言
-5. **主题支持**：添加暗色/亮色主题切换
-
----
-
-**🎉 前端重构完成！代码结构更清晰，维护成本大幅降低！**
\ No newline at end of file
diff --git a/frontend/components/chat_area.py b/frontend/components/chat_area.py
index e135e64..816192b 100644
--- a/frontend/components/chat_area.py
+++ b/frontend/components/chat_area.py
@@ -3,6 +3,7 @@
 包含模型选择、消息显示和输入框
 """
 
+import re
 import streamlit as st
 
 # 使用绝对导入
@@ -13,28 +14,30 @@ from frontend.config import config
 
 def render_chat_area():
     """渲染中间聊天区域"""
-    # 模型选择器
+    # 顶部：极简模型选择器（可选放在顶部中间）
     _render_model_selector()
     
-    st.divider()
+    # 使用空白占位符或者不需要 divider 让界面更干净
+    st.write("")
     
-    # 聊天容器
-    _render_chat_container()
+    # 渲染历史消息
+    _render_chat_history()
     
-    # 输入框
-    _render_input_box()
+    # 输入框和流式响应处理
+    _render_input_and_response()
 
 
 def _render_model_selector():
-    """渲染模型选择器"""
-    col_model, col_empty = st.columns([2, 3])
+    """渲染模型选择器，极简风格"""
+    col_empty1, col_model, col_empty2 = st.columns([1, 2, 1])
     
     with col_model:
         selected_model = st.selectbox(
-            "🧠 选择模型",
+            "选择模型",
             options=list(config.model_options.keys()),
             format_func=lambda x: config.model_options[x],
-            index=_get_model_index()
+            index=_get_model_index(),
+            label_visibility="collapsed" # 隐藏标签，只显示下拉框，更加现代
         )
         AppState.set_selected_model(selected_model)
 
@@ -51,45 +54,71 @@ def _get_model_index() -> int:
     return model_keys.index(current_model) if current_model in model_keys else 0
 
 
-def _render_chat_container():
-    """渲染聊天消息容器"""
-    chat_container = st.container(height=500)
-    
-    with chat_container:
-        messages = AppState.get_messages()
-        for msg in messages:
-            with st.chat_message(msg["role"]):
-                st.markdown(msg["content"])
+def _render_chat_history():
+    """渲染历史聊天消息"""
+    messages = AppState.get_messages()
+    for msg in messages:
+        with st.chat_message(msg["role"]):
+            content = msg["content"]
+            
+            # 1. 尝试解析我们在前端流式结束后存入的 ```thought 格式
+            if "```thought\n" in content:
+                parts = content.split("```thought\n")
+                if parts[0].strip():
+                    st.markdown(parts[0])
+                
+                for part in parts[1:]:
+                    if "\n```\n" in part:
+                        thought, rest = part.split("\n```\n", 1)
+                        with st.expander("🤔 思考过程", expanded=False):
+                            st.markdown(thought)
+                        if rest.strip():
+                            st.markdown(rest)
+                    else:
+                        st.markdown("```thought\n" + part)
+                        
+            # 2. 尝试解析从后端原始加载的历史记录中包含的 <think> 标签
+            elif "<think>" in content and "</think>" in content:
+                # 提取思考内容和剩余正文
+                thought_match = re.search(r'<think>(.*?)</think>', content, re.DOTALL)
+                if thought_match:
+                    thought = thought_match.group(1).strip()
+                    rest = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL).strip()
+                    
+                    with st.expander("🤔 思考过程", expanded=False):
+                        st.markdown(thought)
+                    if rest:
+                        st.markdown(rest)
+                else:
+                    st.markdown(content)
+            else:
+                st.markdown(content)
 
 
-def _render_input_box():
-    """渲染输入框和流式响应处理"""
+def _render_input_and_response():
+    """渲染输入框并处理用户输入与AI响应"""
     if prompt := st.chat_input("请输入您的问题...", key="chat_input"):
-        _handle_user_message(prompt)
-
-
-def _handle_user_message(prompt: str):
-    """
-    处理用户消息
-    
-    Args:
-        prompt: 用户输入的消息
-    """
-    # 显示用户消息
-    with st.chat_message("user"):
-        st.markdown(prompt)
-    AppState.add_message("user", prompt)
-    
-    # 流式调用 AI 回复
-    _handle_ai_response()
+        # 显示用户消息
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        AppState.add_message("user", prompt)
+        
+        # 流式调用 AI 回复
+        _handle_ai_response()
 
 
 def _handle_ai_response():
-    """处理 AI 流式响应"""
+    """处理 AI 流式响应 (适配 LangGraph v2 事件格式)"""
     with st.chat_message("assistant"):
+        # 用于容纳思考过程的占位符（只有在使用 DeepSeek reasoner 时才显示）
+        thought_placeholder = st.empty()
         message_placeholder = st.empty()
         tool_status_placeholder = st.empty()
-        full_response = ""
+        
+        raw_text = ""
+        api_thought = ""
+        display_text = ""
+        display_thought = ""
         
         # 调用流式 API
         stream = api_client.chat_stream(
@@ -99,38 +128,163 @@ def _handle_ai_response():
             user_id=AppState.get_user_id()
         )
         
-        # 消费流式响应
+        # 消费流式响应 (v2 格式)
         for event in stream:
             event_type = event.get("type")
             
-            if event_type == "token":
-                # 逐字输出
-                full_response += event.get("content", "")
-                message_placeholder.markdown(full_response + "▌")
+            # [DEBUG] 可以在前端终端看到接收到的事件
+            import logging
+            if event_type == "llm_token":
+                logging.debug(f"[Frontend Stream] token: {repr(event.get('token'))}, reasoning: {repr(event.get('reasoning_token'))}")
             
-            elif event_type == "tool_start":
-                # 工具调用开始
-                tool_name = event.get("tool", "")
-                tool_status_placeholder.info(f"🔧 调用工具: {tool_name}...")
+            # 1. 处理 LLM Token 流 (打字机效果)
+            if event_type == "llm_token":
+                # 确保只处理来自 LLM 的 token，避免将工具的输出作为 token 显示
+                if event.get("node") == "llm_call":
+                    token = str(event.get("token", ""))
+                    reasoning_token = str(event.get("reasoning_token", ""))
+                    
+                    if reasoning_token:
+                        api_thought += reasoning_token
+                    if token:
+                        raw_text += token
+                        
+                    display_thought = api_thought
+                    display_text = raw_text
+                    is_thinking = False
+                    
+                    # 1. 原生 API 推理模式 (如 DeepSeek-Reasoner)
+                    if api_thought:
+                        is_thinking = not bool(raw_text.strip())
+                        
+                    # 2. 本地模型 <think> 标签模式 (如 Gemma, 本地 DeepSeek)
+                    if "<think>" in raw_text:
+                        think_match = re.search(r'<think>(.*?)(</think>|$)', raw_text, re.DOTALL)
+                        if think_match:
+                            display_thought = think_match.group(1).strip()
+                            is_thinking = "</think>" not in raw_text
+                            
+                            # 正文部分应该是除去了整个 <think>...</think> 块后的剩余内容
+                            # 注意：流式输出时可能 </think> 还没出来，此时也要把 <think> 到末尾的部分剔除，只显示正文
+                            if is_thinking:
+                                display_text = re.sub(r'<think>.*$', '', raw_text, flags=re.DOTALL).strip()
+                            else:
+                                display_text = re.sub(r'<think>.*?</think>', '', raw_text, flags=re.DOTALL).strip()
+                    elif "<" in raw_text and "think" in raw_text and not raw_text.startswith("<think>"):
+                        # 处理一种特殊情况：模型正在输出 <think> 标签的过程中（例如刚输出了 "<thin"）
+                        # 此时正则表达式匹配不到完整的 "<think>"，会导致残缺的标签显示在正文中
+                        # 我们做个简单拦截：如果在开头发现了不完整的标签，暂时不显示它
+                        if re.match(r'^<t[hink>]*$', raw_text):
+                            display_text = ""
+                            is_thinking = True
+                    
+                    # 渲染思考过程
+                    if display_thought:
+                        # 使用 st.empty 的特殊方式来避免闪烁和嵌套
+                        # Streamlit 无法在流式中动态切换 expander 的 expanded 状态
+                        # 最好的方法是直接写一个 markdown 组件，使用 info 的样式来模拟
+                        if is_thinking:
+                            # 正在思考时，直接显示内容，不要用 expander
+                            thought_placeholder.info(f"**🤔 思考过程 (正在思考...)**\n\n{display_thought}▌")
+                        else:
+                            # 思考完毕后，将 placeholder 替换为空，等待最终替换为折叠面板
+                            thought_placeholder.info(f"**🤔 思考过程**\n\n{display_thought}")
+                    
+                    # 渲染正式回复
+                    if display_text or not is_thinking:
+                        cursor = "▌" if not is_thinking else ""
+                        message_placeholder.markdown(display_text + cursor)
             
-            elif event_type == "tool_end":
-                # 工具调用完成
-                tool_name = event.get("tool", "")
-                tool_status_placeholder.success(f"✅ 工具 {tool_name} 完成")
-                tool_status_placeholder.empty()
+            # 2. 处理状态更新 (节点完成、工具结果等)
+            elif event_type == "state_update":
+                # state_update 可能包含多种数据，常见的是 messages 更新
+                data = event.get("data", {})
+                messages_update = event.get("messages", [])
+                
+                if not messages_update and isinstance(data, dict):
+                    for node_name, node_data in data.items():
+                        if isinstance(node_data, dict) and "messages" in node_data:
+                            messages_update = node_data["messages"]
+                
+                # 如果更新中包含 messages，说明某个节点输出了完整消息
+                # 但我们已经在用 token 流构建回复，这里可以用来检测工具调用结果
+                if messages_update:
+                    # 检查最后一条消息是否来自工具
+                    last_msg = messages_update[-1] if messages_update else {}
+                    if isinstance(last_msg, dict) and last_msg.get("role") == "tool":
+                        tool_name = last_msg.get("name", "unknown")
+                        tool_status_placeholder.success(f"✅ 工具 {tool_name} 执行完成")
+                        # 短暂显示后清除，保持界面清爽
+                        import time
+                        time.sleep(0.5)
+                        tool_status_placeholder.empty()
             
-            elif event_type == "done":
-                # 对话完成
-                _show_completion_stats(event)
+            # 3. 处理自定义事件 (你在后端通过 get_stream_writer 发送的)
+            elif event_type == "custom":
+                custom_data = event.get("data", {})
+                # 检查是否是完成事件
+                if custom_data.get("type") == "done":
+                    _show_completion_stats(custom_data)
+                # 其他自定义事件，比如工具调用状态
+                elif "type" in custom_data:
+                    custom_type = custom_data["type"]
+                    if custom_type == "tool_start":
+                        tool_name = custom_data.get("tool", "unknown")
+                        tool_status_placeholder.info(f"🔧 调用工具: {tool_name}...")
+                    elif custom_type == "tool_end":
+                        tool_name = custom_data.get("tool", "unknown")
+                        tool_status_placeholder.success(f"✅ 工具 {tool_name} 完成")
+                        tool_status_placeholder.empty()
+                    elif "status" in custom_data:
+                        status_msg = custom_data.get("status", "")
+                        tool_status_placeholder.info(f"🔧 {status_msg}")
             
+            # 4. 处理错误
             elif event_type == "error":
-                # 错误处理
                 st.error(f"❌ 错误: {event.get('message', '未知错误')}")
+                break  # 发生错误时停止处理
+            
+            # 注意：v2 格式中没有固定的 "done" 事件，流结束即代表完成
+            # 统计信息 (token_usage, elapsed_time) 通常会在最后的 state_update 中携带
+            # 如果后端在最终状态里返回了这些信息，可以在此处理
         
-        # 显示完整响应
-        message_placeholder.markdown(full_response)
-        AppState.add_message("assistant", full_response)
+        # 流结束后，移除光标并保存完整回复
+        display_text = raw_text
+        display_thought = api_thought
+        
+        # 最后的标签清理，以防未闭合
+        if "<think>" in raw_text:
+            think_match = re.search(r'<think>(.*?)(</think>|$)', raw_text, re.DOTALL)
+            if think_match:
+                display_thought = think_match.group(1).strip()
+                display_text = re.sub(r'<think>.*?(</think>|$)', '', raw_text, flags=re.DOTALL).strip()
+                
+        if display_thought:
+            # 只有在最终结束时，才把它放进折叠面板
+            with thought_placeholder.container():
+                with st.expander("🤔 思考过程", expanded=False):
+                    st.markdown(display_thought)
+        else:
+            thought_placeholder.empty()
+                    
+        # 移除光标
+        message_placeholder.markdown(display_text)
+        
+        # 拼装包含思考过程的完整内容，以便后续在历史中正确渲染
+        final_content = display_text
+        if display_thought:
+            final_content = f"```thought\n{display_thought}\n```\n\n" + display_text
+            
+        AppState.add_message("assistant", final_content)
         tool_status_placeholder.empty()
+        
+        # 消息发送完毕后，静默刷新历史记录列表
+        # （因为可能生成了新对话，或者旧对话摘要已更新）
+        from frontend.components.sidebar import _refresh_threads
+        _refresh_threads()
+        
+        # 强制重绘页面，使侧边栏立即显示最新记录
+        st.rerun()
 
 
 def _show_completion_stats(event: dict):
diff --git a/frontend/components/info_panel.py b/frontend/components/info_panel.py
index 7540cc4..0061551 100644
--- a/frontend/components/info_panel.py
+++ b/frontend/components/info_panel.py
@@ -10,50 +10,30 @@ from frontend.state import AppState
 
 
 def render_info_panel():
-    """渲染右侧信息面板"""
-    st.header("📊 会话信息")
-    
-    # 当前线程信息
-    _render_thread_info()
-    
-    st.divider()
+    """渲染右侧信息面板（现改为侧边栏底部）"""
+    st.caption("📊 会话信息")
     
     # 消息统计
     _render_message_stats()
     
-    st.divider()
-    
     # 使用提示
     _render_tips()
 
 
-def _render_thread_info():
-    """渲染当前线程信息"""
-    st.subheader("当前对话")
-    thread_id = AppState.get_current_thread_id()
-    st.code(thread_id[:8] + "...", language=None)
-
-
 def _render_message_stats():
     """渲染消息统计"""
-    st.subheader("消息统计")
-    
     stats = AppState.get_message_stats()
-    
-    col1, col2 = st.columns(2)
-    with col1:
-        st.metric("用户消息", stats["user"])
-    with col2:
-        st.metric("AI 回复", stats["assistant"])
+    st.markdown(f"<span style='font-size:0.8em;color:#666;'>共 {stats['user']} 问 / {stats['assistant']} 答</span>", unsafe_allow_html=True)
 
 
 def _render_tips():
     """渲染使用提示"""
-    st.subheader("💡 使用提示")
-    st.markdown("""
-    - 左侧可切换历史对话
-    - 点击"新对话"开始新话题
-    - 登录后对话历史隔离
-    - 支持流式实时响应
-    - 模型可随时切换
-    """)
+    with st.expander("💡 使用提示", expanded=False):
+        st.markdown("""
+        <div style='font-size:0.85em;color:#555;'>
+        - 左侧可切换历史对话<br>
+        - 点击"新对话"开始新话题<br>
+        - 登录后对话历史隔离<br>
+        - 模型可随时切换
+        </div>
+        """, unsafe_allow_html=True)
diff --git a/frontend/components/sidebar.py b/frontend/components/sidebar.py
index fb57c21..c23aa3e 100644
--- a/frontend/components/sidebar.py
+++ b/frontend/components/sidebar.py
@@ -14,14 +14,22 @@ from frontend.config import config
 
 def render_sidebar():
     """渲染左侧栏"""
-    _render_user_section()
+    # 顶部放置新对话按钮，像 ChatGPT/DeepSeek 一样显眼
+    _render_history_actions()
     st.divider()
+    
+    # 历史列表
     _render_history_section()
+    
+    # 底部放用户部分
+    st.divider()
+    _render_user_section()
 
 
 def _render_user_section():
     """渲染用户登录区域"""
-    st.header("👤 用户")
+    # st.header("👤 用户") # 移除显眼的标题，改用更柔和的 caption
+    st.caption("👤 用户管理")
     
     if not AppState.is_logged_in():
         _render_login_form()
@@ -32,58 +40,62 @@ def _render_user_section():
 def _render_login_form():
     """渲染登录表单"""
     username = st.text_input(
-        "输入用户名（可选）",
+        "用户名",
         key="login_input",
-        placeholder="留空使用默认用户",
-        help="未登录将使用 default_user，可能导致对话污染"
+        placeholder="输入用户名...",
+        help="未登录将使用 default_user，可能导致对话污染",
+        label_visibility="collapsed"
     )
     
-    if st.button("✅ 进入", type="primary", use_container_width=True):
+    if st.button("进入", type="secondary", use_container_width=True):
         AppState.login(username)
         _refresh_threads()
         st.rerun()
     
-    st.info("💡 建议登录以隔离对话历史")
+    # st.info("💡 建议登录以隔离对话历史") # 移除多余色块
 
 
 def _render_user_info():
     """渲染用户信息"""
-    st.success(f"✅ 当前用户: `{AppState.get_user_id()}`")
+    st.markdown(f"**当前用户**: `{AppState.get_user_id()}`")
     
-    if st.button("🔄 切换用户", use_container_width=True):
+    if st.button("切换用户", type="secondary", use_container_width=True):
         AppState.logout()
+        _refresh_threads()
         st.rerun()
 
 
 def _render_history_section():
     """渲染历史对话列表"""
-    st.header("📚 对话历史")
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        st.caption("📚 对话历史")
+    with col2:
+        if st.button("🔄", help="刷新列表", key="refresh_history_btn"):
+            _refresh_threads()
     
-    # 操作按钮
-    _render_history_actions()
-    
-    st.divider()
-    
-    # 历史列表
     _render_thread_list()
 
 
 def _render_history_actions():
     """渲染历史操作按钮"""
-    if st.button("🔄 刷新列表", use_container_width=True):
-        _refresh_threads()
-    
-    if st.button("➕ 新对话", type="primary", use_container_width=True):
+    # 移除了 type="primary"，让它变成普通的线框按钮，不再是大红块
+    if st.button("➕ 新对话", use_container_width=True):
         AppState.start_new_thread()
         st.rerun()
 
 
 def _render_thread_list():
     """渲染线程列表"""
+    # 仅在初次加载时拉取，或由外部主动调用 _refresh_threads() 更新
+    if "threads_loaded" not in st.session_state:
+        _refresh_threads()
+        st.session_state.threads_loaded = True
+        
     threads = AppState.get_threads()
     
     if not threads:
-        st.info("暂无对话历史")
+        st.caption("暂无对话历史")
         return
     
     for thread in threads:
@@ -98,28 +110,23 @@ def _render_thread_item(thread: dict):
         thread: 线程信息字典
     """
     thread_id = thread["thread_id"]
-    summary = thread.get("summary", "空对话")
-    message_count = thread.get("message_count", 0)
-    last_updated = thread.get("last_updated", "")
-    
-    # 格式化时间
-    time_str = _format_time(last_updated)
+    summary = thread.get("summary", "新对话")
     
     # 判断是否为当前线程
     is_current = thread_id == AppState.get_current_thread_id()
-    button_type = "primary" if is_current else "secondary"
     
-    # 截断摘要
-    summary_display = summary[:config.summary_max_length]
-    if len(summary) > config.summary_max_length:
-        summary_display += "..."
+    # 根据是否当前线程改变按钮样式
+    btn_type = "primary" if is_current else "tertiary"
+    
+    # 为了避免内容过长，截断摘要
+    display_text = summary[:15] + "..." if len(summary) > 15 else summary
     
-    # 渲染按钮
     if st.button(
-        f"💬 {summary_display}\n\n🕐 {time_str} | {message_count}条",
+        display_text,
         key=f"thread_{thread_id}",
+        help=f"完整摘要: {summary}",
         use_container_width=True,
-        type=button_type
+        type=btn_type
     ):
         _load_thread(thread_id)
 
diff --git a/frontend/config.py b/frontend/config.py
index 1c7656f..b9b8c9d 100644
--- a/frontend/config.py
+++ b/frontend/config.py
@@ -24,7 +24,7 @@ class FrontendConfig:
     layout: str = "wide"
     
     # ==================== 模型配置 ====================
-    default_model: str = "zhipu"
+    default_model: str = "local"  # 更改为local作为默认模型
     model_options: dict = None
     
     # ==================== 用户配置 ====================
@@ -41,9 +41,9 @@ class FrontendConfig:
         """初始化后处理 - 设置默认值和加载环境变量"""
         if self.model_options is None:
             self.model_options = {
-                "zhipu": "智谱 GLM-4.7-Flash（在线）",
-                "deepseek": "DeepSeek V3.2（在线）",
-                "local": "本地 llama.cpp（Gemma-4）"
+                "local": "本地 llama.cpp（Gemma-4）",      # 本地模型作为第一个
+                "deepseek": "DeepSeek V3.2（在线）",      # DeepSeek 作为中间
+                "zhipu": "智谱 GLM-4.7-Flash（在线）"     # GLM-4.7 作为最后一个
             }
         
         # 从环境变量加载配置
@@ -53,9 +53,9 @@ class FrontendConfig:
         """从环境变量加载配置（优先级最高）"""
         # API 地址（移除 /chat 后缀）
         # 优先级：环境变量 API_URL > 默认值
-        api_url = os.getenv("API_URL", "http://localhost:8083")
+        api_url = os.getenv("API_URL", "http://127.0.0.1:8083")
         self.api_base = api_url.replace("/chat", "").rstrip("/")
 
 
 # 全局配置实例（单例模式）
-config = FrontendConfig()
+config = FrontendConfig()
\ No newline at end of file
diff --git a/frontend/frontend.py b/frontend/frontend.py
deleted file mode 100644
index 8d70c6f..0000000
--- a/frontend/frontend.py
+++ /dev/null
@@ -1,409 +0,0 @@
-"""
-右侧栏组件：工具状态和统计信息
-"""
-import streamlit as st
-
-
-def render_info_panel():
-    st.header("📊 会话信息")
-    
-    # 当前线程信息
-    st.subheader("当前对话")
-    st.code(st.session_state.current_thread_id[:8] + "...", language=None)
-    
-    st.divider()
-    
-    # 消息统计
-    st.subheader("消息统计")
-    user_msgs = len([m for m in st.session_state.messages if m["role"] == "user"])
-    assistant_msgs = len([m for m in st.session_state.messages if m["role"] == "assistant"])
-    
-    st.metric("用户消息", user_msgs)
-    st.metric("AI 回复", assistant_msgs)
-    
-    st.divider()
-    
-    # 使用提示
-    st.subheader("💡 使用提示")
-    st.markdown("""
-    - 左侧可切换历史对话
-    - 点击"新对话"开始新话题
-    - 登录后对话历史隔离
-    - 支持流式实时响应
-    - 模型可随时切换
-    """)
-"""
-中间栏组件：聊天区域
-"""
-import streamlit as st
-from ..config import config
-from ..api_client import stream_chat
-
-
-def render_chat_area():
-    # 模型选择器
-    col_model, col_empty = st.columns([2, 3])
-    with col_model:
-        selected_model_key = st.selectbox(
-            "🧠 选择模型",
-            options=list(config.model_options.keys()),
-            format_func=lambda x: config.model_options[x],
-            index=list(config.model_options.keys()).index(st.session_state.selected_model) if st.session_state.selected_model in config.model_options else 0
-        )
-        st.session_state.selected_model = selected_model_key
-    
-    st.divider()
-    
-    # 显示消息历史
-    chat_container = st.container(height=500)
-    with chat_container:
-        for msg in st.session_state.messages:
-            with st.chat_message(msg["role"]):
-                st.markdown(msg["content"])
-    
-    # 输入框
-    if prompt := st.chat_input("请输入您的问题...", key="chat_input"):
-        # 显示用户消息
-        with st.chat_message("user"):
-            st.markdown(prompt)
-        st.session_state.messages.append({"role": "user", "content": prompt})
-        
-        # 流式调用后端
-        with st.chat_message("assistant"):
-            message_placeholder = st.empty()
-            tool_status_placeholder = st.empty()
-            full_response = ""
-            
-            stream_gen = stream_chat(
-                message=prompt,
-                thread_id=st.session_state.current_thread_id,
-                model=st.session_state.selected_model,
-                user_id=st.session_state.user_id
-            )
-            
-            if stream_gen:
-                for data in stream_gen:
-                    if data["type"] == "token":
-                        full_response += data["content"]
-                        message_placeholder.markdown(full_response + "▌")
-                    
-                    elif data["type"] == "tool_start":
-                        tool_status_placeholder.info(f"🔧 调用工具: {data['tool']}...")
-                    
-                    elif data["type"] == "tool_end":
-                        tool_status_placeholder.success(f"✅ 工具 {data['tool']} 完成")
-                        tool_status_placeholder.empty()
-                    
-                    elif data["type"] == "done":
-                        # 最终响应
-                        token_usage = data.get("token_usage", {})
-                        elapsed = data.get("elapsed_time", 0)
-                        if token_usage:
-                            st.caption(f"📊 消耗 {token_usage.get('total_tokens', 0)} tokens | ⏱️ {elapsed:.2f}s")
-                    
-                    elif data["type"] == "error":
-                        st.error(f"❌ 错误: {data['message']}")
-                
-                # 显示完整响应
-                message_placeholder.markdown(full_response)
-                st.session_state.messages.append({"role": "assistant", "content": full_response})
-                tool_status_placeholder.empty()
-"""
-左侧栏组件：用户登录 + 历史对话列表
-"""
-from datetime import datetime
-import streamlit as st
-from ..state import AppState
-from ..api_client import refresh_threads, load_thread_history
-
-
-def render_sidebar():
-    st.header("👤 用户")
-    
-    # 用户登录区域
-    if not st.session_state.logged_in:
-        username = st.text_input(
-            "输入用户名（可选）",
-            key="login_input",
-            placeholder="留空使用默认用户",
-            help="未登录将使用 default_user，可能导致对话污染"
-        )
-        
-        if st.button("✅ 进入", type="primary", use_container_width=True):
-            AppState.login(username)
-            refresh_threads(st.session_state.user_id)
-        
-        st.info("💡 建议登录以隔离对话历史")
-    else:
-        st.success(f"✅ 当前用户: `{st.session_state.user_id}`")
-        
-        if st.button("🔄 切换用户", use_container_width=True):
-            AppState.reset_login()
-    
-    st.divider()
-    
-    # 历史对话列表
-    st.header("📚 对话历史")
-    
-    # 刷新按钮
-    if st.button("🔄 刷新列表", use_container_width=True):
-        refresh_threads(st.session_state.user_id)
-    
-    # 新对话按钮
-    if st.button("➕ 新对话", type="primary", use_container_width=True):
-        AppState.start_new_thread()
-    
-    st.divider()
-    
-    # 显示历史列表
-    if st.session_state.threads:
-        for thread in st.session_state.threads:
-            thread_id = thread["thread_id"]
-            summary = thread.get("summary", "空对话")
-            message_count = thread.get("message_count", 0)
-            last_updated = thread.get("last_updated", "")
-            
-            # 格式化时间
-            if last_updated:
-                try:
-                    dt = datetime.fromisoformat(last_updated.replace("Z", "+00:00"))
-                    time_str = dt.strftime("%m-%d %H:%M")
-                except:
-                    time_str = last_updated[:10]
-            else:
-                time_str = "未知"
-            
-            # 按钮样式
-            is_current = thread_id == st.session_state.current_thread_id
-            button_type = "primary" if is_current else "secondary"
-            
-            if st.button(
-                f"💬 {summary[:30]}{'...' if len(summary) > 30 else ''}\n\n🕐 {time_str} | {message_count}条",
-                key=f"thread_{thread_id}",
-                use_container_width=True,
-                type=button_type
-            ):
-                load_thread_history(thread_id, st.session_state.user_id)
-    else:
-        st.info("暂无对话历史")
-# Components package
-"""
-后端 API 客户端封装
-"""
-import json
-import requests
-import streamlit as st
-from .config import config
-
-
-def refresh_threads(user_id: str):
-    """刷新用户的历史对话列表"""
-    try:
-        resp = requests.get(
-            f"{config.api_base}/threads",
-            params={"user_id": user_id, "limit": 50},
-            timeout=10
-        )
-        if resp.status_code == 200:
-            st.session_state.threads = resp.json()["threads"]
-        else:
-            st.error(f"加载历史列表失败: HTTP {resp.status_code}")
-    except Exception as e:
-        st.error(f"加载历史列表失败: {e}")
-
-
-def load_thread_history(thread_id: str, user_id: str):
-    """加载指定线程的完整消息历史"""
-    try:
-        resp = requests.get(
-            f"{config.api_base}/thread/{thread_id}/messages",
-            params={"user_id": user_id},
-            timeout=10
-        )
-        if resp.status_code == 200:
-            st.session_state.messages = resp.json()["messages"]
-            st.session_state.current_thread_id = thread_id
-            st.rerun()
-        else:
-            st.error(f"加载对话失败: HTTP {resp.status_code}")
-    except Exception as e:
-        st.error(f"加载对话失败: {e}")
-
-
-def stream_chat(message: str, thread_id: str, model: str, user_id: str):
-    """流式调用后端聊天接口"""
-    payload = {
-        "message": message,
-        "thread_id": thread_id,
-        "model": model,
-        "user_id": user_id
-    }
-    
-    try:
-        with requests.post(
-            f"{config.api_base}/chat/stream",
-            json=payload,
-            stream=True,
-            timeout=120
-        ) as response:
-            if response.status_code != 200:
-                st.error(f"请求失败: HTTP {response.status_code}")
-                return None
-            
-            full_response = ""
-            for line in response.iter_lines():
-                if line:
-                    line = line.decode('utf-8')
-                    if line.startswith("data: "):
-                        data_str = line[6:]
-                        if data_str == "[DONE]":
-                            break
-                        try:
-                            data = json.loads(data_str)
-                            yield data
-                        except json.JSONDecodeError:
-                            pass
-            return full_response
-            
-    except Exception as e:
-        st.error(f"请求失败: {e}")
-        return None
-"""
-Session State 管理
-"""
-import uuid
-import streamlit as st
-
-
-class AppState:
-    """管理 Streamlit Session State"""
-
-    @staticmethod
-    def init():
-        """初始化必要的 session state 变量"""
-        if "user_id" not in st.session_state:
-            st.session_state.user_id = "default_user"
-        if "logged_in" not in st.session_state:
-            st.session_state.logged_in = False
-        if "threads" not in st.session_state:
-            st.session_state.threads = []
-        if "current_thread_id" not in st.session_state:
-            st.session_state.current_thread_id = str(uuid.uuid4())
-        if "messages" not in st.session_state:
-            st.session_state.messages = []
-        if "selected_model" not in st.session_state:
-            st.session_state.selected_model = "zhipu"
-        if "loading_history" not in st.session_state:
-            st.session_state.loading_history = False
-
-    @staticmethod
-    def reset_login():
-        """重置登录状态"""
-        st.session_state.logged_in = False
-        st.session_state.user_id = "default_user"
-        st.session_state.threads = []
-        st.rerun()
-
-    @staticmethod
-    def login(username: str):
-        """执行登录"""
-        st.session_state.user_id = username.strip() if username.strip() else "default_user"
-        st.session_state.logged_in = True
-        st.rerun()
-
-    @staticmethod
-    def start_new_thread():
-        """开始新对话"""
-        st.session_state.current_thread_id = str(uuid.uuid4())
-        st.session_state.messages = []
-        st.rerun()
-"""
-应用配置
-"""
-import os
-from dataclasses import dataclass
-
-
-@dataclass
-class AppConfig:
-    page_title: str = "AI 个人助手"
-    page_icon: str = "🤖"
-    layout: str = "wide"
-    # 后端 API 地址配置
-    # 优先级：环境变量 API_URL > Docker 内部服务名 > 本地开发地址
-    api_base: str = os.getenv("API_URL", "http://localhost:8001").replace("/chat", "")
-    
-    model_options: dict = None
-
-    def __post_init__(self):
-        if self.model_options is None:
-            self.model_options = {
-                "zhipu": "智谱 GLM-4.7-Flash（在线）",
-                "deepseek": "DeepSeek V3.2（在线）",
-                "local": "本地 vLLM（Gemma-4）"
-            }
-
-config = AppConfig()
-"""
-AI Agent 前端主入口
-采用模块化架构，仅负责组装各组件
-"""
-
-import sys
-import os
-
-# 添加项目根目录到 Python 路径，支持绝对导入
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-import streamlit as st
-
-# 使用绝对导入
-from frontend.config import config
-from frontend.state import AppState
-from frontend.components.sidebar import render_sidebar
-from frontend.components.chat_area import render_chat_area
-from frontend.components.info_panel import render_info_panel
-
-
-# =============================================================================
-# 页面配置
-# =============================================================================
-st.set_page_config(
-    page_title=config.page_title,
-    page_icon=config.page_icon,
-    layout=config.layout
-)
-
-
-# =============================================================================
-# 初始化状态
-# =============================================================================
-AppState.init()
-
-
-# =============================================================================
-# 主界面
-# =============================================================================
-def main():
-    """主界面渲染 - 三栏布局"""
-    # 标题
-    st.title("🤖 个人生活与数据分析助手")
-    
-    # 三栏布局：左侧栏(1) + 中间栏(3) + 右侧栏(1)
-    col_sidebar, col_chat, col_info = st.columns([1, 3, 1])
-    
-    # 左侧栏：用户登录 + 历史对话
-    with col_sidebar:
-        render_sidebar()
-    
-    # 中间栏：模型选择 + 聊天区域 + 输入框
-    with col_chat:
-        render_chat_area()
-    
-    # 右侧栏：会话信息 + 统计 + 使用提示
-    with col_info:
-        render_info_panel()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/frontend/frontend_main.py b/frontend/frontend_main.py
new file mode 100644
index 0000000..1a38f89
--- /dev/null
+++ b/frontend/frontend_main.py
@@ -0,0 +1,125 @@
+"""
+AI Agent 前端主入口
+采用模块化架构，仅负责组装各组件
+"""
+
+import sys
+import os
+
+# 添加项目根目录到 Python 路径，支持绝对导入
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import streamlit as st
+
+# 使用绝对导入
+from frontend.config import config
+from frontend.state import AppState
+from frontend.components.sidebar import render_sidebar
+from frontend.components.chat_area import render_chat_area
+from frontend.components.info_panel import render_info_panel
+
+
+# =============================================================================
+# 页面配置
+# =============================================================================
+st.set_page_config(
+    page_title=config.page_title,
+    page_icon=config.page_icon,
+    layout=config.layout
+)
+
+
+# =============================================================================
+# 初始化状态
+# =============================================================================
+AppState.init()
+
+def apply_custom_css():
+    """应用自定义CSS样式，实现极简风格"""
+    st.markdown("""
+        <style>
+        /* 移除顶部默认空白 */
+        .block-container {
+            padding-top: 2rem !important;
+            padding-bottom: 2rem !important;
+        }
+        
+        /* 侧边栏样式优化：降低背景色对比度，稍微暗一点提高区分度 */
+        [data-testid="stSidebar"] {
+            background-color: #f0f2f5 !important;
+            border-right: 1px solid #e1e4e8;
+        }
+        
+        /* 隐藏标题和头像边框的粗重线条 */
+        hr {
+            margin: 1em 0;
+            border-color: #eee;
+        }
+        
+        /* 自定义按钮样式：去除强烈的背景色，使用浅色线框或扁平风 */
+        .stButton>button {
+            border-radius: 8px;
+            font-weight: 500;
+        }
+        
+        /* 覆盖 Primary 按钮默认的刺眼大红色，改为柔和的深色高亮 */
+        .stButton>button[kind="primary"] {
+            background-color: #e5e7eb !important;
+            color: #1f2937 !important;
+            border: 1px solid #d1d5db !important;
+        }
+        
+        /* 覆盖 Primary 按钮悬停效果 */
+        .stButton>button[kind="primary"]:hover {
+            background-color: #d1d5db !important;
+            border-color: #9ca3af !important;
+            color: #111827 !important;
+        }
+        
+        /* 普通按钮悬停效果 */
+        .stButton>button:hover {
+            border-color: #9ca3af;
+            color: #1f2937;
+            background-color: #f9fafb;
+        }
+        
+        /* 聊天输入框美化 */
+        [data-testid="stChatInput"] {
+            border-radius: 12px;
+            border: 1px solid #e0e0e0;
+            box-shadow: 0 2px 10px rgba(0,0,0,0.03);
+        }
+        
+        /* 用户和 AI 的头像调整 */
+        .stChatMessage {
+            padding: 1rem 0;
+            border-bottom: 1px solid #f8f8f8;
+        }
+        </style>
+    """, unsafe_allow_html=True)
+
+
+# =============================================================================
+# 主界面
+# =============================================================================
+def main():
+    """主界面渲染 - 极简宽屏布局"""
+    # 应用 CSS
+    apply_custom_css()
+    
+    # 顶部标题（可选，也可以不放，让界面更像对话框）
+    st.markdown("<h3 style='text-align: center; font-weight: 400; color: #555; margin-bottom: 2rem;'>个人助手</h3>", unsafe_allow_html=True)
+
+    # 左侧边栏：合并用户登录、模型选择和历史对话
+    with st.sidebar:
+        render_sidebar()
+        # 将原本右侧的信息面板简化并移入侧边栏底部
+        st.divider()
+        render_info_panel()
+
+    # 中间主区域：全宽的聊天区域
+    render_chat_area()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/frontend/state.py b/frontend/state.py
index 5efa26f..e1d32bb 100644
--- a/frontend/state.py
+++ b/frontend/state.py
@@ -60,6 +60,8 @@ class AppState:
         """
         st.session_state.user_id = username.strip() if username.strip() else config.default_user_id
         st.session_state.logged_in = True
+        # 登录后必须开启一个干净的新对话
+        AppState.start_new_thread()
     
     @staticmethod
     def logout():
@@ -67,6 +69,8 @@ class AppState:
         st.session_state.logged_in = False
         st.session_state.user_id = config.default_user_id
         st.session_state.threads = []
+        # 登出后必须开启一个干净的新对话
+        AppState.start_new_thread()
     
     # ==================== 线程相关 ====================
     
diff --git a/scripts/start.sh b/scripts/start.sh
index d3387d2..6ee87f5 100755
--- a/scripts/start.sh
+++ b/scripts/start.sh
@@ -81,11 +81,11 @@ check_config() {
         check_fail "ZHIPUAI_API_KEY 未配置或格式错误"
     fi
     
-    # 检查 VLLM_LOCAL_KEY
-    if grep -q "^VLLM_LOCAL_KEY=" "$PROJECT_DIR/.env" 2>/dev/null; then
-        check_pass "VLLM_LOCAL_KEY 已配置"
+    # 检查 LLAMACPP_API_KEY
+    if grep -q "^LLAMACPP_API_KEY=" "$PROJECT_DIR/.env" 2>/dev/null; then
+        check_pass "LLAMACPP_API_KEY 已配置"
     else
-        check_warn "VLLM_LOCAL_KEY 未配置（如不使用本地模型可忽略）"
+        check_warn "LLAMACPP_API_KEY 未配置（如不使用本地模型可忽略）"
     fi
     
     # 检查 DB_URI (远程服务器)
@@ -147,8 +147,7 @@ check_config() {
     
     # 测试 PostgreSQL 连接
     if command -v psql &> /dev/null; then
-        # 注意：这里假设密码为 mysecretpassword，如果不同需调整或从 env 读取
-        if PGPASSWORD=mysecretpassword psql -h 115.190.121.151 -U postgres -d langgraph_db -c "SELECT 1;" &> /dev/null; then
+        if PGPASSWORD=huang1998 psql -h 115.190.121.151 -U postgres -d langgraph_db -c "SELECT 1;" &> /dev/null; then
             check_pass "PostgreSQL 远程连接正常 (115.190.121.151:5432)"
         else
             check_fail "PostgreSQL 远程连接失败"
@@ -306,11 +305,11 @@ start_frontend() {
     set +a
     
     export PYTHONPATH="$PROJECT_DIR"
-    streamlit run frontend/frontend.py &
+    streamlit run frontend/frontend_main.py &
     FRONTEND_PID=$!
     echo -e "${GREEN}✓ 前端服务已启动 (PID: $FRONTEND_PID)${NC}"
     echo -e "${GREEN}✓ 访问地址:${NC}"
-    echo -e "   本地开发: http://localhost:8501"
+    echo -e "   本地开发: http://127.0.0.1:8501"
 }
 
 # =============================================================================
@@ -333,7 +332,7 @@ docker_up() {
     echo -e "\n${GREEN}✓ Docker Compose 服务已启动${NC}"
     echo -e "${BLUE}📊 查看服务状态:${NC} docker compose ps"
     echo -e "${BLUE}📝 查看日志:${NC} docker compose logs -f"
-    echo -e "${BLUE}🌐 访问应用:${NC} http://localhost:8501"
+    echo -e "${BLUE}🌐 访问应用:${NC} http://127.0.0.1:8501"
 }
 
 docker_down() {
@@ -395,6 +394,7 @@ case "${1:-help}" in
         check_llamacpp || start_llamacpp
         check_embedding || start_embedding
         start_backend
+        sleep 3
         start_frontend
         echo -e "\n${GREEN}所有服务正在运行，按 Ctrl+C 停止 Python 服务${NC}"
         echo -e "${YELLOW}注意：Docker 容器会在后台继续运行${NC}"