Files
ailine/backend/app/core/web_search.py
root b5c15ef445
All checks were successful
构建并部署 AI Agent 服务 / deploy (push) Successful in 12m9s
refactor: 单图方案重构 + 动态模型选择 + chat_services优化
## 核心改动

### 1. 单图方案重构
- 删除了多图(self.graphs),改为单图(self.graph)
- 新增 MainGraphState.current_model 字段用于运行时注入模型
- llm_call 节点改为动态选择模型(create_dynamic_llm_call_node)

### 2. chat_services 优化
- 添加 _cached_services 缓存,避免重复初始化
- 新增 get_cached_chat_services() 函数,用于单图注入
- 新增 _check_http_service_available() 统一HTTP探测逻辑
- 减少重复代码,LocalVLLMChatProvider和LocalSmallModelProvider共用探测方法

### 3. AIAgentService 重构
- initialize() 只构建一次图,传入 chat_services 字典
- 新增 _resolve_model() 模型回退逻辑
- 新增 _build_invocation() 统一构建调用参数
- process_message() 和 process_message_stream() 改为注入 current_model
- 流式处理代码拆分,增加可读性

### 4. 新增和删除文件
- 新增:backend/app/main_graph/main_graph_builder.py(图构建)
- 新增:backend/app/main_graph/subgraph_wrapper.py(子图封装)
- 新增:tools/test/test_tavily_search.py(测试)
- 删除:backend/app/main_graph/graph.py(旧图)
- 删除:backend/app/main_graph/utils/main_graph_builder.py(旧构建器)
- 删除:backend/app/main_graph/utils/__init__.py

### 5. 其他更新
- README.md:新增模型服务使用情况详解章节
- backend/app/model_services/__init__.py:新增 get_cached_chat_services 导出

## 方案优势

- 内存优化:N张图 → 1张图
- 灵活性:运行时动态选择模型,支持同会话不同模型
- 性能:模型服务缓存,初始化仅一次
- 可维护性:减少重复代码,统一HTTP探测逻辑
2026-05-05 17:30:55 +08:00

253 lines
8.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
联网搜索公共工具 - 无需 API Key免费使用 DuckDuckGo
Web Search Public Utility - Free, no API Key, using DuckDuckGo
"""
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from datetime import datetime
import requests
import warnings
import re
@dataclass
class SearchResult:
"""搜索结果数据类"""
title: str
url: str
snippet: str
source: str = "DuckDuckGo"
timestamp: datetime = None
def __post_init__(self):
if self.timestamp is None:
self.timestamp = datetime.now()
class WebSearchTool:
"""联网搜索公共工具类"""
def __init__(self, max_results: int = 5):
self.max_results = max_results
def search(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
"""
使用多种方式搜索,按优先级尝试
Args:
query: 搜索关键词
max_results: 返回结果数量,默认使用初始化时的设置
Returns:
搜索结果列表
"""
num_results = max_results or self.max_results
# 方式 1: Tavily (需要 API Key质量最高)
try:
return self._search_tavily(query, num_results)
except ImportError:
print("[WebSearch] tavily 未安装,尝试其他搜索方式")
except Exception as e:
if "API_KEY" in str(e) or "未配置" in str(e):
print(f"[WebSearch] Tavily API Key 未配置: {e}")
else:
print(f"[WebSearch] Tavily 搜索失败: {e}")
# 方式 2: 尝试用 ddgs 包
try:
from ddgs import DDGS
print(f"[WebSearch] 使用 ddgs 搜索: {query}")
with DDGS() as ddgs:
results = list(ddgs.text(query, max_results=num_results))
if results:
search_results = []
for r in results:
search_results.append(SearchResult(
title=r.get("title", ""),
url=r.get("href", ""),
snippet=r.get("body", ""),
source="DuckDuckGo"
))
print(f"[WebSearch] ddgs 返回 {len(search_results)} 条结果")
return search_results
except ImportError:
print("[WebSearch] ddgs 未安装,尝试 duckduckgo-search")
except Exception as e:
print(f"[WebSearch] ddgs 搜索失败: {e}")
# 方式 3: 尝试用简单 HTTP 请求
try:
return self._search_http(query, num_results)
except Exception as e:
print(f"[WebSearch] HTTP 搜索也失败: {e}")
# 方式 4: 返回模拟数据作为最后兜底
return self._search_mock(query, num_results)
def _search_tavily(self, query: str, max_results: int) -> List[SearchResult]:
"""使用 Tavily API 搜索"""
from tavily import TavilyClient
from app.config import TAVILY_API_KEY, TAVILY_MAX_RESULTS
if not TAVILY_API_KEY:
raise ValueError("TAVILY_API_KEY 未配置")
client = TavilyClient(api_key=TAVILY_API_KEY)
response = client.search(
query=query,
max_results=min(max_results, TAVILY_MAX_RESULTS or 5),
include_answer=True,
include_raw_content=False
)
results = []
for item in response.get("results", []):
results.append(SearchResult(
title=item.get("title", ""),
url=item.get("url", ""),
snippet=item.get("content", ""),
source="Tavily"
))
print(f"[WebSearch] Tavily 返回 {len(results)} 条结果")
return results
def _search_http(self, query: str, max_results: int) -> List[SearchResult]:
"""用简单 HTTP 请求搜索(备用方案)- 尝试多个国内源"""
print(f"[WebSearch] 尝试 HTTP 搜索")
# 方式 1: 尝试百度搜索(简单方式)
try:
return self._search_baidu(query, max_results)
except Exception as e:
print(f"[WebSearch] 百度搜索失败: {e}")
# 方式 2: 返回模拟数据
return self._search_mock(query, max_results)
def _search_baidu(self, query: str, max_results: int) -> List[SearchResult]:
"""尝试百度搜索"""
import requests
from urllib.parse import quote
url = f"https://www.baidu.com/s?wd={quote(query)}"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
# 简单解析百度搜索结果(简化版)
results = []
# 这里只是示意,真实百度搜索需要更复杂的解析
results.append(SearchResult(
title=f"百度搜索: {query}",
url=url,
snippet="如需要真实搜索结果,请考虑使用百度搜索 API",
source="百度"
))
return results
except Exception as e:
print(f"[WebSearch] 百度搜索也失败: {e}")
raise
def _search_mock(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
"""模拟搜索结果(兜底方案)"""
print(f"[WebSearch] 使用模拟搜索结果 (查询: {query})")
# 根据查询内容生成更有意义的模拟结果
mock_templates = [
{
"title": f"关于「{query}」的相关介绍",
"snippet": "这是模拟结果。如需真实搜索,请检查容器网络连接或配置代理。",
"url": "https://example.com/about"
},
{
"title": f"{query}」 - 最新动态",
"snippet": "提示:在容器内运行时,需要确保能访问外网。",
"url": "https://example.com/latest"
},
{
"title": f"了解更多关于「{query}」的内容",
"snippet": "建议:检查 Docker 网络配置,或使用代理。",
"url": "https://example.com/more"
}
]
num = max_results or self.max_results
results = []
for i, template in enumerate(mock_templates[:num]):
results.append(SearchResult(
title=template["title"],
url=template["url"],
snippet=template["snippet"],
source="模拟数据"
))
return results
def format_search_results(self, results: List[SearchResult]) -> str:
"""
格式化搜索结果(带引用溯源)
Args:
results: 搜索结果列表
Returns:
格式化后的 Markdown 文本
"""
if not results:
return "未找到相关搜索结果"
lines = []
lines.append("## 🔍 联网搜索结果\n")
for idx, result in enumerate(results, 1):
lines.append(f"### [{idx}] {result.title}")
lines.append(f"- 🔗 来源:[{result.url}]({result.url})")
lines.append(f"- 📝 摘要:{result.snippet}")
lines.append(f"- 📅 时间:{result.timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
lines.append("")
# 添加引用溯源说明
lines.append("---")
lines.append("💡 **引用溯源说明**")
lines.append("- 以上搜索结果均标注了来源链接")
lines.append("- 使用方括号数字标识引用(如 [1]、[2]")
lines.append("- 可通过链接追溯原始信息")
return "\n".join(lines)
# 单例实例
_web_search_tool = None
def get_web_search_tool() -> WebSearchTool:
"""获取联网搜索工具单例"""
global _web_search_tool
if _web_search_tool is None:
_web_search_tool = WebSearchTool()
return _web_search_tool
def web_search(query: str, max_results: int = 5) -> str:
"""
便捷函数:联网搜索并返回格式化结果
Args:
query: 搜索关键词
max_results: 返回结果数量
Returns:
格式化后的搜索结果文本
"""
tool = get_web_search_tool()
results = tool.search(query, max_results)
return tool.format_search_results(results)