Files
ailine/backend/app/model_services/chat_services.py

403 lines
13 KiB
Python
Raw Normal View History

"""
生成式大模型服务模块
本模块提供统一的生成式大模型服务获取接口支持多种模型
1. Local VLLM 服务本地 gemma-4-E4B-it 模型
2. Zhipu AI智谱 glm-5.1 模型
3. DeepSeekdeepseek-v4-pro 模型
主要功能
- LocalVLLMChatProvider本地 VLLM 服务提供者
- ZhipuChatProvider智谱 API 服务提供者
- DeepSeekChatProviderDeepSeek API 服务提供者
- get_chat_service()获取默认服务带自动降级
- get_all_chat_services()获取所有可用模型服务用于多模型切换
"""
import logging
from typing import Dict, Callable
from langchain_core.language_models import BaseChatModel
from .base import (
BaseServiceProvider,
FallbackServiceChain,
SingletonServiceManager
)
from app.config import (
VLLM_BASE_URL,
LLM_API_KEY,
ZHIPUAI_API_KEY,
DEEPSEEK_API_KEY,
LOCAL_MODEL_NAME
)
logger = logging.getLogger(__name__)
class LocalVLLMChatProvider(BaseServiceProvider[BaseChatModel]):
"""
本地 VLLM 生成式大模型服务提供者
"""
def __init__(self, model: str = None):
super().__init__("local_vllm_chat")
self._model = model or LOCAL_MODEL_NAME
def is_available(self) -> bool:
"""
检查本地 VLLM 服务是否可用
Returns:
bool: 服务是否可用
"""
if not VLLM_BASE_URL:
logger.warning("VLLM_BASE_URL 未配置")
return False
try:
# 先测试主机名能否解析
import httpx
from urllib.parse import urlparse
parsed_url = urlparse(VLLM_BASE_URL)
host = parsed_url.hostname
port = parsed_url.port or (80 if parsed_url.scheme == 'http' else 443)
# 测试能否建立 TCP 连接(快速失败)
import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(2.0)
try:
sock.connect((host, port))
sock.close()
except Exception as e:
logger.warning(f"本地 VLLM 服务无法连接: {host}:{port} - {e}")
return False
# 再尝试调用简单的 API比如 models 接口)
client = httpx.Client(base_url=VLLM_BASE_URL.rstrip('/'), timeout=5.0)
headers = {}
if LLM_API_KEY:
headers["Authorization"] = f"Bearer {LLM_API_KEY}"
try:
response = client.get("/models", headers=headers)
if response.status_code == 200:
logger.info(f"本地 VLLM 服务可用: {self._model}")
return True
except Exception:
pass
# 如果 /v1/models 失败,也认为服务不可用
logger.warning(f"本地 VLLM 服务响应异常")
return False
except Exception as e:
logger.warning(f"本地 VLLM 服务不可用: {e}")
return False
def get_service(self) -> BaseChatModel:
"""
获取本地 VLLM 服务
Returns:
BaseChatModel: LangChain 兼容的 ChatModel 实例
"""
if self._service_instance is None:
from langchain_openai import ChatOpenAI
from pydantic import SecretStr
self._service_instance = ChatOpenAI(
base_url=VLLM_BASE_URL,
api_key=SecretStr(LLM_API_KEY) if LLM_API_KEY else SecretStr(""),
model=self._model,
timeout=60.0,
max_retries=2,
streaming=True,
)
return self._service_instance
class ZhipuChatProvider(BaseServiceProvider[BaseChatModel]):
"""
智谱 AI 生成式大模型服务提供者
"""
def __init__(self, model: str = "glm-5.1"):
super().__init__("zhipu_chat")
self._model = model
def is_available(self) -> bool:
"""
检查智谱 AI 服务是否可用
Returns:
bool: 服务是否可用
"""
if not ZHIPUAI_API_KEY:
logger.warning("ZHIPUAI_API_KEY 未配置")
return False
try:
logger.info(f"智谱 AI 服务配置正确,准备使用: {self._model}")
return True
except Exception as e:
logger.warning(f"智谱 AI 服务不可用: {e}")
return False
def get_service(self) -> BaseChatModel:
"""
获取智谱 AI 服务
Returns:
BaseChatModel: LangChain 兼容的 ChatModel 实例
"""
if self._service_instance is None:
from langchain_community.chat_models import ChatZhipuAI
self._service_instance = ChatZhipuAI(
model=self._model,
api_key=ZHIPUAI_API_KEY,
temperature=0.1,
max_tokens=4096,
timeout=120.0,
max_retries=3,
streaming=True,
)
return self._service_instance
class DeepSeekChatProvider(BaseServiceProvider[BaseChatModel]):
"""
DeepSeek 生成式大模型服务提供者
"""
def __init__(self, model: str = "deepseek-v4-pro"):
super().__init__("deepseek_chat")
self._model = model
def is_available(self) -> bool:
"""
检查 DeepSeek 服务是否可用
Returns:
bool: 服务是否可用
"""
if not DEEPSEEK_API_KEY:
logger.warning("DEEPSEEK_API_KEY 未配置")
return False
try:
logger.info(f"DeepSeek 服务配置正确,准备使用: {self._model}")
return True
except Exception as e:
logger.warning(f"DeepSeek 服务不可用: {e}")
return False
def get_service(self) -> BaseChatModel:
"""
获取 DeepSeek 服务
Returns:
BaseChatModel: LangChain 兼容的 ChatModel 实例
"""
if self._service_instance is None:
from langchain_openai import ChatOpenAI
from pydantic import SecretStr
self._service_instance = ChatOpenAI(
base_url="https://api.deepseek.com",
api_key=SecretStr(DEEPSEEK_API_KEY),
model=self._model,
temperature=0.1,
max_tokens=4096,
timeout=60.0,
max_retries=2,
streaming=True,
)
return self._service_instance
# ========== 轻量级模型 Provider ==========
class LocalSmallModelProvider(BaseServiceProvider[BaseChatModel]):
"""
本地轻量级模型服务提供者用于查询改写意图分类等简单任务
使用小模型独立配置
"""
def __init__(self, model: str = None):
from app.config import SMALL_LOCAL_MODEL_NAME, SMALL_VLLM_BASE_URL, SMALL_LLM_API_KEY
super().__init__("local_small")
self._model = model or SMALL_LOCAL_MODEL_NAME
self._base_url = SMALL_VLLM_BASE_URL
self._api_key = SMALL_LLM_API_KEY
def is_available(self) -> bool:
"""检查本地小模型服务是否可用"""
if not self._base_url:
logger.warning("SMALL_VLLM_BASE_URL 未配置,本地小模型不可用")
return False
try:
# 先测试主机名能否解析
import httpx
from urllib.parse import urlparse
parsed_url = urlparse(self._base_url)
host = parsed_url.hostname
port = parsed_url.port or (80 if parsed_url.scheme == 'http' else 443)
# 测试能否建立 TCP 连接(快速失败)
import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(2.0)
try:
sock.connect((host, port))
sock.close()
except Exception as e:
logger.warning(f"本地小模型服务无法连接: {host}:{port} - {e}")
return False
# 再尝试调用简单的 API
client = httpx.Client(base_url=self._base_url.rstrip('/'), timeout=5.0)
headers = {}
if self._api_key:
headers["Authorization"] = f"Bearer {self._api_key}"
try:
response = client.get("/models", headers=headers)
if response.status_code == 200:
logger.info(f"本地小模型服务可用: {self._model}")
return True
except Exception:
pass
logger.warning(f"本地小模型服务响应异常")
return False
except Exception as e:
logger.warning(f"本地小模型服务不可用: {e}")
return False
def get_service(self) -> BaseChatModel:
"""获取本地小模型服务"""
if self._service_instance is None:
from langchain_openai import ChatOpenAI
from pydantic import SecretStr
self._service_instance = ChatOpenAI(
base_url=self._base_url,
api_key=SecretStr(self._api_key) if self._api_key else SecretStr(""),
model=self._model,
timeout=30.0,
max_retries=2,
streaming=False,
)
return self._service_instance
class DeepSeekSmallModelProvider(BaseServiceProvider[BaseChatModel]):
"""
DeepSeek 轻量级模型服务提供者用于查询改写意图分类等简单任务
使用小模型独立配置
"""
def __init__(self, model: str = None):
from app.config import SMALL_DEEPSEEK_MODEL, SMALL_DEEPSEEK_API_KEY, SMALL_DEEPSEEK_API_BASE
super().__init__("deepseek_small")
self._model = model or SMALL_DEEPSEEK_MODEL
self._api_key = SMALL_DEEPSEEK_API_KEY
self._api_base = SMALL_DEEPSEEK_API_BASE
def is_available(self) -> bool:
if not self._api_key:
logger.warning("SMALL_DEEPSEEK_API_KEY 未配置")
return False
logger.info(f"DeepSeek 轻量模型配置正确: {self._model}")
return True
def get_service(self) -> BaseChatModel:
if self._service_instance is None:
from langchain_openai import ChatOpenAI
from pydantic import SecretStr
self._service_instance = ChatOpenAI(
base_url=self._api_base,
api_key=SecretStr(self._api_key),
model=self._model,
temperature=0.1,
max_tokens=2048,
timeout=30.0,
max_retries=2,
streaming=False,
)
return self._service_instance
# 全局服务映射表 - 名称 -> Provider
CHAT_PROVIDERS: Dict[str, Callable[[], BaseServiceProvider[BaseChatModel]]] = {
"local": lambda: LocalVLLMChatProvider(),
"zhipu": lambda: ZhipuChatProvider(),
"deepseek": lambda: DeepSeekChatProvider(),
}
def get_chat_service() -> BaseChatModel:
"""
获取默认的生成式大模型服务带自动降级
优先顺序: local -> zhipu -> deepseek
Returns:
BaseChatModel: LangChain 兼容的 ChatModel 实例
"""
def _create_chain():
primary = LocalVLLMChatProvider()
fallbacks = [ZhipuChatProvider(), DeepSeekChatProvider()]
return FallbackServiceChain(primary, fallbacks)
chain = SingletonServiceManager.get_or_create("chat_service_chain", _create_chain)
return chain.get_available_service()
def get_all_chat_services() -> Dict[str, BaseChatModel]:
"""
获取所有可用的生成式大模型服务用于多模型切换
Returns:
Dict[str, BaseChatModel]: 模型名称 -> ChatModel 实例 的字典
"""
services = {}
for name, provider_factory in CHAT_PROVIDERS.items():
try:
provider = provider_factory()
if provider.is_available():
logger.info(f"模型 '{name}' 可用")
services[name] = provider.get_service()
else:
logger.warning(f"模型 '{name}' 不可用,跳过")
except Exception as e:
logger.warning(f"初始化模型 '{name}' 失败: {e}")
if not services:
raise RuntimeError(f"没有可用的生成式大模型,尝试了: {list(CHAT_PROVIDERS.keys())}")
return services
def get_small_llm_service() -> BaseChatModel:
"""
获取轻量级大模型服务用于查询改写意图分类等简单任务
优先顺序: 本地模型 -> DeepSeek 小模型
注意小模型任务不降级到大模型避免不必要的 token 消耗
Returns:
BaseChatModel: LangChain 兼容的 ChatModel 实例
"""
def _create_small_chain():
primary = LocalSmallModelProvider()
fallbacks = [DeepSeekSmallModelProvider()]
return FallbackServiceChain(primary, fallbacks)
chain = SingletonServiceManager.get_or_create("small_llm_chain", _create_small_chain)
return chain.get_available_service()