1. 双模型服务 (llm + smallLLM) - 增加 get_small_llm_service() 函数 - 支持智谱/DeepSeek 小模型作为轻量级选项 2. 前置混合路由 - 规则快速分流(无 LLM,超快速) - 轻量级意图分类(smallLLM) - 快速路径:fast_chitchat, fast_rag, fast_tool 3. 自动升级机制 - 快速路径失败 → 自动回到 React 循环 - SSE 事件增强:intent_classified, path_decision, fast_path_*, escalation 4. 向后兼容 - build_react_main_graph(use_hybrid_router=True/False) - 可选择启用或禁用混合路由 5. 更新 intent.py - 支持 use_small_llm 参数 - 保留原有完整功能供 React 循环使用
This commit is contained in:
@@ -216,6 +216,75 @@ class DeepSeekChatProvider(BaseServiceProvider[BaseChatModel]):
|
||||
return self._service_instance
|
||||
|
||||
|
||||
# ========== 轻量级模型 Provider ==========
|
||||
|
||||
class ZhipuSmallModelProvider(BaseServiceProvider[BaseChatModel]):
|
||||
"""
|
||||
智谱 AI 轻量级模型服务提供者(用于意图分类等简单任务)
|
||||
使用 glm-5.1-flash 或其他小模型
|
||||
"""
|
||||
|
||||
def __init__(self, model: str = "glm-5.1-flash"):
|
||||
super().__init__("zhipu_small")
|
||||
self._model = model
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""检查智谱轻量模型服务是否可用"""
|
||||
if not ZHIPUAI_API_KEY:
|
||||
logger.warning("ZHIPUAI_API_KEY 未配置,轻量模型不可用")
|
||||
return False
|
||||
logger.info(f"智谱轻量模型配置正确: {self._model}")
|
||||
return True
|
||||
|
||||
def get_service(self) -> BaseChatModel:
|
||||
"""获取智谱轻量模型服务"""
|
||||
if self._service_instance is None:
|
||||
from langchain_community.chat_models import ChatZhipuAI
|
||||
self._service_instance = ChatZhipuAI(
|
||||
model=self._model,
|
||||
api_key=ZHIPUAI_API_KEY,
|
||||
temperature=0.1,
|
||||
max_tokens=2048,
|
||||
timeout=30.0,
|
||||
max_retries=2,
|
||||
streaming=False
|
||||
)
|
||||
return self._service_instance
|
||||
|
||||
class DeepSeekSmallModelProvider(BaseServiceProvider[BaseChatModel]):
|
||||
"""
|
||||
DeepSeek 轻量级模型服务提供者(备选)
|
||||
"""
|
||||
|
||||
def __init__(self, model: str = "deepseek-chat"):
|
||||
super().__init__("deepseek_small")
|
||||
self._model = model
|
||||
|
||||
def is_available(self) -> bool:
|
||||
if not DEEPSEEK_API_KEY:
|
||||
logger.warning("DEEPSEEK_API_KEY 未配置")
|
||||
return False
|
||||
logger.info(f"DeepSeek 轻量模型配置正确: {self._model}")
|
||||
return True
|
||||
|
||||
def get_service(self) -> BaseChatModel:
|
||||
if self._service_instance is None:
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import SecretStr
|
||||
|
||||
self._service_instance = ChatOpenAI(
|
||||
base_url="https://api.deepseek.com",
|
||||
api_key=SecretStr(DEEPSEEK_API_KEY),
|
||||
model=self._model,
|
||||
temperature=0.1,
|
||||
max_tokens=2048,
|
||||
timeout=30.0,
|
||||
max_retries=2,
|
||||
streaming=False,
|
||||
)
|
||||
return self._service_instance
|
||||
|
||||
|
||||
# 全局服务映射表 - 名称 -> Provider
|
||||
CHAT_PROVIDERS: Dict[str, Callable[[], BaseServiceProvider[BaseChatModel]]] = {
|
||||
"local": lambda: LocalVLLMChatProvider(),
|
||||
@@ -265,3 +334,24 @@ def get_all_chat_services() -> Dict[str, BaseChatModel]:
|
||||
raise RuntimeError(f"没有可用的生成式大模型,尝试了: {list(CHAT_PROVIDERS.keys())}")
|
||||
|
||||
return services
|
||||
|
||||
|
||||
def get_small_llm_service() -> BaseChatModel:
|
||||
"""
|
||||
获取轻量级大模型服务(用于意图分类等简单任务)
|
||||
优先顺序: zhipu_small -> deepseek_small -> (降级到 get_chat_service)
|
||||
|
||||
Returns:
|
||||
BaseChatModel: LangChain 兼容的 ChatModel 实例
|
||||
"""
|
||||
def _create_small_chain():
|
||||
primary = ZhipuSmallModelProvider()
|
||||
fallbacks = [DeepSeekSmallModelProvider()]
|
||||
return FallbackServiceChain(primary, fallbacks)
|
||||
|
||||
try:
|
||||
chain = SingletonServiceManager.get_or_create("small_llm_chain", _create_small_chain)
|
||||
return chain.get_available_service()
|
||||
except Exception as e:
|
||||
logger.warning(f"轻量模型初始化失败,降级到默认大模型: {e}")
|
||||
return get_chat_service()
|
||||
|
||||
Reference in New Issue
Block a user