Files
ailine/backend/app/model_services/chat_services.py
root a5fc9cd5d8
All checks were successful
构建并部署 AI Agent 服务 / deploy (push) Successful in 6m8s
完整的混合路由优化系统
1. 双模型服务 (llm + smallLLM)
   - 增加 get_small_llm_service() 函数
   - 支持智谱/DeepSeek 小模型作为轻量级选项

2. 前置混合路由
   - 规则快速分流(无 LLM,超快速)
   - 轻量级意图分类(smallLLM)
   - 快速路径:fast_chitchat, fast_rag, fast_tool

3. 自动升级机制
   - 快速路径失败 → 自动回到 React 循环
   - SSE 事件增强:intent_classified, path_decision, fast_path_*, escalation

4. 向后兼容
   - build_react_main_graph(use_hybrid_router=True/False)
   - 可选择启用或禁用混合路由

5. 更新 intent.py
   - 支持 use_small_llm 参数
   - 保留原有完整功能供 React 循环使用
2026-05-03 16:45:46 +08:00

358 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
生成式大模型服务模块
本模块提供统一的生成式大模型服务获取接口,支持多种模型:
1. Local VLLM 服务:本地 gemma-4-E4B-it 模型
2. Zhipu AI智谱 glm-5.1 模型
3. DeepSeekdeepseek-v4-pro 模型
主要功能:
- LocalVLLMChatProvider本地 VLLM 服务提供者
- ZhipuChatProvider智谱 API 服务提供者
- DeepSeekChatProviderDeepSeek API 服务提供者
- get_chat_service():获取默认服务(带自动降级)
- get_all_chat_services():获取所有可用模型服务(用于多模型切换)
"""
import logging
from typing import Dict, Callable
from langchain_core.language_models import BaseChatModel
from .base import (
BaseServiceProvider,
FallbackServiceChain,
SingletonServiceManager
)
from app.config import (
VLLM_BASE_URL,
LLM_API_KEY,
ZHIPUAI_API_KEY,
DEEPSEEK_API_KEY
)
logger = logging.getLogger(__name__)
class LocalVLLMChatProvider(BaseServiceProvider[BaseChatModel]):
"""
本地 VLLM 生成式大模型服务提供者
"""
def __init__(self, model: str = "gemma-4-E4B-it"):
super().__init__("local_vllm_chat")
self._model = model
def is_available(self) -> bool:
"""
检查本地 VLLM 服务是否可用
Returns:
bool: 服务是否可用
"""
if not VLLM_BASE_URL:
logger.warning("VLLM_BASE_URL 未配置")
return False
try:
# 先测试主机名能否解析
import httpx
from urllib.parse import urlparse
parsed_url = urlparse(VLLM_BASE_URL)
host = parsed_url.hostname
port = parsed_url.port or (80 if parsed_url.scheme == 'http' else 443)
# 测试能否建立 TCP 连接(快速失败)
import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(2.0)
try:
sock.connect((host, port))
sock.close()
except Exception as e:
logger.warning(f"本地 VLLM 服务无法连接: {host}:{port} - {e}")
return False
# 再尝试调用简单的 API比如 models 接口)
client = httpx.Client(base_url=VLLM_BASE_URL.rstrip('/'), timeout=5.0)
headers = {}
if LLM_API_KEY:
headers["Authorization"] = f"Bearer {LLM_API_KEY}"
try:
response = client.get("/v1/models", headers=headers)
if response.status_code == 200:
logger.info(f"本地 VLLM 服务可用: {self._model}")
return True
except Exception:
pass
# 如果 /v1/models 失败,也认为服务不可用
logger.warning(f"本地 VLLM 服务响应异常")
return False
except Exception as e:
logger.warning(f"本地 VLLM 服务不可用: {e}")
return False
def get_service(self) -> BaseChatModel:
"""
获取本地 VLLM 服务
Returns:
BaseChatModel: LangChain 兼容的 ChatModel 实例
"""
if self._service_instance is None:
from langchain_openai import ChatOpenAI
from pydantic import SecretStr
self._service_instance = ChatOpenAI(
base_url=VLLM_BASE_URL,
api_key=SecretStr(LLM_API_KEY) if LLM_API_KEY else SecretStr(""),
model=self._model,
timeout=60.0,
max_retries=2,
streaming=True,
)
return self._service_instance
class ZhipuChatProvider(BaseServiceProvider[BaseChatModel]):
"""
智谱 AI 生成式大模型服务提供者
"""
def __init__(self, model: str = "glm-5.1"):
super().__init__("zhipu_chat")
self._model = model
def is_available(self) -> bool:
"""
检查智谱 AI 服务是否可用
Returns:
bool: 服务是否可用
"""
if not ZHIPUAI_API_KEY:
logger.warning("ZHIPUAI_API_KEY 未配置")
return False
try:
logger.info(f"智谱 AI 服务配置正确,准备使用: {self._model}")
return True
except Exception as e:
logger.warning(f"智谱 AI 服务不可用: {e}")
return False
def get_service(self) -> BaseChatModel:
"""
获取智谱 AI 服务
Returns:
BaseChatModel: LangChain 兼容的 ChatModel 实例
"""
if self._service_instance is None:
from langchain_community.chat_models import ChatZhipuAI
self._service_instance = ChatZhipuAI(
model=self._model,
api_key=ZHIPUAI_API_KEY,
temperature=0.1,
max_tokens=4096,
timeout=120.0,
max_retries=3,
streaming=True,
)
return self._service_instance
class DeepSeekChatProvider(BaseServiceProvider[BaseChatModel]):
"""
DeepSeek 生成式大模型服务提供者
"""
def __init__(self, model: str = "deepseek-v4-pro"):
super().__init__("deepseek_chat")
self._model = model
def is_available(self) -> bool:
"""
检查 DeepSeek 服务是否可用
Returns:
bool: 服务是否可用
"""
if not DEEPSEEK_API_KEY:
logger.warning("DEEPSEEK_API_KEY 未配置")
return False
try:
logger.info(f"DeepSeek 服务配置正确,准备使用: {self._model}")
return True
except Exception as e:
logger.warning(f"DeepSeek 服务不可用: {e}")
return False
def get_service(self) -> BaseChatModel:
"""
获取 DeepSeek 服务
Returns:
BaseChatModel: LangChain 兼容的 ChatModel 实例
"""
if self._service_instance is None:
from langchain_openai import ChatOpenAI
from pydantic import SecretStr
self._service_instance = ChatOpenAI(
base_url="https://api.deepseek.com",
api_key=SecretStr(DEEPSEEK_API_KEY),
model=self._model,
temperature=0.1,
max_tokens=4096,
timeout=60.0,
max_retries=2,
streaming=True,
)
return self._service_instance
# ========== 轻量级模型 Provider ==========
class ZhipuSmallModelProvider(BaseServiceProvider[BaseChatModel]):
"""
智谱 AI 轻量级模型服务提供者(用于意图分类等简单任务)
使用 glm-5.1-flash 或其他小模型
"""
def __init__(self, model: str = "glm-5.1-flash"):
super().__init__("zhipu_small")
self._model = model
def is_available(self) -> bool:
"""检查智谱轻量模型服务是否可用"""
if not ZHIPUAI_API_KEY:
logger.warning("ZHIPUAI_API_KEY 未配置,轻量模型不可用")
return False
logger.info(f"智谱轻量模型配置正确: {self._model}")
return True
def get_service(self) -> BaseChatModel:
"""获取智谱轻量模型服务"""
if self._service_instance is None:
from langchain_community.chat_models import ChatZhipuAI
self._service_instance = ChatZhipuAI(
model=self._model,
api_key=ZHIPUAI_API_KEY,
temperature=0.1,
max_tokens=2048,
timeout=30.0,
max_retries=2,
streaming=False
)
return self._service_instance
class DeepSeekSmallModelProvider(BaseServiceProvider[BaseChatModel]):
"""
DeepSeek 轻量级模型服务提供者(备选)
"""
def __init__(self, model: str = "deepseek-chat"):
super().__init__("deepseek_small")
self._model = model
def is_available(self) -> bool:
if not DEEPSEEK_API_KEY:
logger.warning("DEEPSEEK_API_KEY 未配置")
return False
logger.info(f"DeepSeek 轻量模型配置正确: {self._model}")
return True
def get_service(self) -> BaseChatModel:
if self._service_instance is None:
from langchain_openai import ChatOpenAI
from pydantic import SecretStr
self._service_instance = ChatOpenAI(
base_url="https://api.deepseek.com",
api_key=SecretStr(DEEPSEEK_API_KEY),
model=self._model,
temperature=0.1,
max_tokens=2048,
timeout=30.0,
max_retries=2,
streaming=False,
)
return self._service_instance
# 全局服务映射表 - 名称 -> Provider
CHAT_PROVIDERS: Dict[str, Callable[[], BaseServiceProvider[BaseChatModel]]] = {
"local": lambda: LocalVLLMChatProvider(),
"zhipu": lambda: ZhipuChatProvider(),
"deepseek": lambda: DeepSeekChatProvider(),
}
def get_chat_service() -> BaseChatModel:
"""
获取默认的生成式大模型服务(带自动降级)
优先顺序: local -> zhipu -> deepseek
Returns:
BaseChatModel: LangChain 兼容的 ChatModel 实例
"""
def _create_chain():
primary = LocalVLLMChatProvider()
fallbacks = [ZhipuChatProvider(), DeepSeekChatProvider()]
return FallbackServiceChain(primary, fallbacks)
chain = SingletonServiceManager.get_or_create("chat_service_chain", _create_chain)
return chain.get_available_service()
def get_all_chat_services() -> Dict[str, BaseChatModel]:
"""
获取所有可用的生成式大模型服务(用于多模型切换)
Returns:
Dict[str, BaseChatModel]: 模型名称 -> ChatModel 实例 的字典
"""
services = {}
for name, provider_factory in CHAT_PROVIDERS.items():
try:
provider = provider_factory()
if provider.is_available():
logger.info(f"模型 '{name}' 可用")
services[name] = provider.get_service()
else:
logger.warning(f"模型 '{name}' 不可用,跳过")
except Exception as e:
logger.warning(f"初始化模型 '{name}' 失败: {e}")
if not services:
raise RuntimeError(f"没有可用的生成式大模型,尝试了: {list(CHAT_PROVIDERS.keys())}")
return services
def get_small_llm_service() -> BaseChatModel:
"""
获取轻量级大模型服务(用于意图分类等简单任务)
优先顺序: zhipu_small -> deepseek_small -> (降级到 get_chat_service)
Returns:
BaseChatModel: LangChain 兼容的 ChatModel 实例
"""
def _create_small_chain():
primary = ZhipuSmallModelProvider()
fallbacks = [DeepSeekSmallModelProvider()]
return FallbackServiceChain(primary, fallbacks)
try:
chain = SingletonServiceManager.get_or_create("small_llm_chain", _create_small_chain)
return chain.get_available_service()
except Exception as e:
logger.warning(f"轻量模型初始化失败,降级到默认大模型: {e}")
return get_chat_service()