ailine/backend/app/backend.py

"""
FastAPI 后端 - 支持动态模型切换，使用 PostgreSQL 持久化记忆
采用依赖注入模式，优雅管理资源生命周期
"""

import os
from .config import DB_URI, BACKEND_PORT
import uuid
import json
from contextlib import asynccontextmanager

from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect, Depends, Request, Query
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
from .agent.service import AIAgentService
from .agent.history import ThreadHistoryService
from .logger import info, error

@asynccontextmanager
async def lifespan(app: FastAPI):
    """应用生命周期管理：创建并注入全局服务"""
    # 1. 创建数据库连接池并初始化表（仅 checkpointer）
    async with AsyncPostgresSaver.from_conn_string(DB_URI) as checkpointer:
        await checkpointer.setup()

        # 2. 构建 AI Agent 服务
        agent_service = AIAgentService(checkpointer)
        await agent_service.initialize()

        # 3. 创建历史查询服务
        history_service = ThreadHistoryService(checkpointer)

        # 4. 将服务实例存入 app.state
        app.state.agent_service = agent_service
        app.state.history_service = history_service

        # 应用运行中...
        yield

        # 5. 关闭时自动清理数据库连接（async with 负责）
        info("🛑 应用关闭，数据库连接池已释放")

app = FastAPI(lifespan=lifespan)

# CORS 中间件（允许前端跨域）
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# ========== 健康检查端点 ==========
@app.get("/health")
async def health_check():
    """健康检查端点，用于 Docker 和 CI/CD 监控"""
    return {"status": "ok", "service": "ai-agent-backend"}

# ========== Pydantic 模型 ==========
class ChatRequest(BaseModel):
    message: str
    thread_id: str | None = None
    model: str = "zhipu"
    user_id: str = "default_user"

class ChatResponse(BaseModel):
    reply: str
    thread_id: str
    model_used: str
    input_tokens: int = 0
    output_tokens: int = 0
    total_tokens: int = 0
    elapsed_time: float = 0.0

# ========== 依赖注入函数 ==========
def get_agent_service(request: Request) -> AIAgentService:
    """从 app.state 中获取全局 AIAgentService 实例"""
    return request.app.state.agent_service

def get_history_service(request: Request) -> ThreadHistoryService:
    """从 app.state 中获取全局 ThreadHistoryService 实例"""
    return request.app.state.history_service

# ========== HTTP 端点 ==========
@app.post("/chat", response_model=ChatResponse)
async def chat_endpoint(
    request: ChatRequest,
    agent_service: AIAgentService = Depends(get_agent_service)
):
    """同步对话接口，支持模型选择"""
    if not request.message:
        raise HTTPException(status_code=400, detail="message required")

    thread_id = request.thread_id or str(uuid.uuid4())
    result = await agent_service.process_message(
        request.message, thread_id, request.model, request.user_id
    )

    # 提取 token 统计信息
    token_usage = result.get("token_usage", {})
    input_tokens = token_usage.get('prompt_tokens', token_usage.get('input_tokens', 0))
    output_tokens = token_usage.get('completion_tokens', token_usage.get('output_tokens', 0))
    elapsed_time = result.get("elapsed_time", 0.0)

    actual_model = request.model if request.model in agent_service.graphs else next(iter(agent_service.graphs.keys()))

    return ChatResponse(
        reply=result["reply"],
        thread_id=thread_id,
        model_used=actual_model,
        input_tokens=input_tokens,
        output_tokens=output_tokens,
        total_tokens=input_tokens + output_tokens,
        elapsed_time=elapsed_time
    )

# ========== 历史查询接口 ==========
@app.get("/threads")
async def list_threads(
    user_id: str = Query("default_user", description="用户 ID"),
    limit: int = Query(50, ge=1, le=200, description="返回数量限制"),
    history_service: ThreadHistoryService = Depends(get_history_service)
):
    """获取当前用户的对话历史列表"""
    threads = await history_service.get_user_threads(user_id, limit)
    return {"threads": threads}

@app.get("/thread/{thread_id}/messages")
async def get_thread_messages(
    thread_id: str,
    user_id: str = Query("default_user", description="用户 ID"),
    history_service: ThreadHistoryService = Depends(get_history_service)
):
    """获取指定线程的完整消息历史"""
    messages = await history_service.get_thread_messages(thread_id)
    return {"messages": messages}

@app.get("/thread/{thread_id}/summary")
async def get_thread_summary(
    thread_id: str,
    user_id: str = Query("default_user", description="用户 ID"),
    history_service: ThreadHistoryService = Depends(get_history_service)
):
    """获取指定线程的摘要信息"""
    summary = await history_service.get_thread_summary(thread_id)
    return summary

# ========== 流式对话接口 ==========
@app.post("/chat/stream")
async def chat_stream_endpoint(
    request: ChatRequest,
    agent_service: AIAgentService = Depends(get_agent_service)
):
    """流式对话接口（SSE）"""
    if not request.message:
        raise HTTPException(status_code=400, detail="message required")

    thread_id = request.thread_id or str(uuid.uuid4())

    async def event_generator():
        try:
            async for chunk in agent_service.process_message_stream(
                request.message, thread_id, request.model, request.user_id
            ):
                yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
            yield "data: [DONE]\n\n"
        except Exception as e:
            error(f"流式响应异常: {e}")
            yield f"data: {json.dumps({'type': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
            yield "data: [DONE]\n\n"

    return StreamingResponse(
        event_generator(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",  # 禁用 Nginx 缓冲
        }
    )

# ========== WebSocket 端点（可选） ==========
@app.websocket("/ws")
async def websocket_endpoint(
    websocket: WebSocket,
    agent_service: AIAgentService = Depends(get_agent_service)
):
    await websocket.accept()
    try:
        while True:
            data = await websocket.receive_json()
            message = data.get("message")
            thread_id = data.get("thread_id", str(uuid.uuid4()))
            model = data.get("model", "zhipu")
            user_id = data.get("user_id", "default_user")
            if not message:
                await websocket.send_json({"error": "missing message"})
                continue
            reply = await agent_service.process_message(message, thread_id, model, user_id)
            actual_model = model if model in agent_service.graphs else next(iter(agent_service.graphs.keys()))
            await websocket.send_json({"reply": reply, "thread_id": thread_id, "model_used": actual_model})
    except WebSocketDisconnect:
        pass

if __name__ == "__main__":
    import uvicorn
    # 使用环境变量或默认端口 8079（避免与 llama.cpp 的 8081 端口冲突）
    port = int(BACKEND_PORT)
    uvicorn.run(app, host="0.0.0.0", port=port)