2026-04-29 23:10:15 +08:00
|
|
|
|
"""
|
|
|
|
|
|
联网搜索公共工具 - 无需 API Key,免费使用 DuckDuckGo
|
|
|
|
|
|
Web Search Public Utility - Free, no API Key, using DuckDuckGo
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
2026-05-08 22:30:26 +08:00
|
|
|
|
from typing import List, Optional
|
2026-04-29 23:10:15 +08:00
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
from datetime import datetime
|
2026-05-08 22:30:26 +08:00
|
|
|
|
|
|
|
|
|
|
from backend.app.logger import info
|
2026-04-29 23:10:15 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
|
class SearchResult:
|
|
|
|
|
|
"""搜索结果数据类"""
|
|
|
|
|
|
title: str
|
|
|
|
|
|
url: str
|
|
|
|
|
|
snippet: str
|
|
|
|
|
|
source: str = "DuckDuckGo"
|
|
|
|
|
|
timestamp: datetime = None
|
|
|
|
|
|
|
|
|
|
|
|
def __post_init__(self):
|
|
|
|
|
|
if self.timestamp is None:
|
|
|
|
|
|
self.timestamp = datetime.now()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WebSearchTool:
|
|
|
|
|
|
"""联网搜索公共工具类"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, max_results: int = 5):
|
|
|
|
|
|
self.max_results = max_results
|
|
|
|
|
|
|
|
|
|
|
|
def search(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
|
|
|
|
|
|
"""
|
2026-05-05 17:30:55 +08:00
|
|
|
|
使用多种方式搜索,按优先级尝试
|
|
|
|
|
|
|
2026-04-29 23:10:15 +08:00
|
|
|
|
Args:
|
|
|
|
|
|
query: 搜索关键词
|
|
|
|
|
|
max_results: 返回结果数量,默认使用初始化时的设置
|
2026-05-05 17:30:55 +08:00
|
|
|
|
|
2026-04-29 23:10:15 +08:00
|
|
|
|
Returns:
|
|
|
|
|
|
搜索结果列表
|
|
|
|
|
|
"""
|
2026-04-30 22:06:01 +08:00
|
|
|
|
num_results = max_results or self.max_results
|
2026-05-01 00:13:19 +08:00
|
|
|
|
|
2026-05-08 22:30:26 +08:00
|
|
|
|
# 尝试搜索方式,按优先级
|
|
|
|
|
|
result = self._try_tavily(query, num_results)
|
|
|
|
|
|
if result is not None:
|
|
|
|
|
|
return result
|
2026-05-05 17:30:55 +08:00
|
|
|
|
|
2026-05-08 22:30:26 +08:00
|
|
|
|
result = self._try_ddgs(query, num_results)
|
|
|
|
|
|
if result is not None:
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
# 兜底方案
|
|
|
|
|
|
return self._get_mock_results(query, num_results)
|
|
|
|
|
|
|
|
|
|
|
|
def _try_tavily(self, query: str, max_results: int) -> Optional[List[SearchResult]]:
|
|
|
|
|
|
"""尝试 Tavily API 搜索"""
|
2026-05-01 00:13:19 +08:00
|
|
|
|
try:
|
2026-05-08 22:30:26 +08:00
|
|
|
|
return self._search_tavily(query, max_results)
|
2026-05-01 00:13:19 +08:00
|
|
|
|
except ImportError:
|
2026-05-08 22:30:26 +08:00
|
|
|
|
info("[WebSearch] tavily 未安装")
|
2026-05-01 00:13:19 +08:00
|
|
|
|
except Exception as e:
|
2026-05-08 22:30:26 +08:00
|
|
|
|
error_msg = str(e)
|
|
|
|
|
|
if "API_KEY" in error_msg or "未配置" in error_msg:
|
|
|
|
|
|
info(f"[WebSearch] Tavily API Key 未配置")
|
|
|
|
|
|
else:
|
|
|
|
|
|
info(f"[WebSearch] Tavily 搜索失败: {e}")
|
|
|
|
|
|
return None
|
2026-05-01 00:13:19 +08:00
|
|
|
|
|
2026-05-05 17:30:55 +08:00
|
|
|
|
def _search_tavily(self, query: str, max_results: int) -> List[SearchResult]:
|
|
|
|
|
|
"""使用 Tavily API 搜索"""
|
|
|
|
|
|
from tavily import TavilyClient
|
2026-05-05 23:17:00 +08:00
|
|
|
|
from backend.app.config import TAVILY_API_KEY, TAVILY_MAX_RESULTS
|
2026-05-05 17:30:55 +08:00
|
|
|
|
|
|
|
|
|
|
if not TAVILY_API_KEY:
|
|
|
|
|
|
raise ValueError("TAVILY_API_KEY 未配置")
|
|
|
|
|
|
|
|
|
|
|
|
client = TavilyClient(api_key=TAVILY_API_KEY)
|
|
|
|
|
|
response = client.search(
|
|
|
|
|
|
query=query,
|
|
|
|
|
|
max_results=min(max_results, TAVILY_MAX_RESULTS or 5),
|
|
|
|
|
|
include_answer=True,
|
|
|
|
|
|
include_raw_content=False
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
results = []
|
|
|
|
|
|
for item in response.get("results", []):
|
|
|
|
|
|
results.append(SearchResult(
|
|
|
|
|
|
title=item.get("title", ""),
|
|
|
|
|
|
url=item.get("url", ""),
|
|
|
|
|
|
snippet=item.get("content", ""),
|
|
|
|
|
|
source="Tavily"
|
|
|
|
|
|
))
|
|
|
|
|
|
|
2026-05-08 22:30:26 +08:00
|
|
|
|
info(f"[WebSearch] Tavily 返回 {len(results)} 条结果")
|
2026-05-05 17:30:55 +08:00
|
|
|
|
return results
|
|
|
|
|
|
|
2026-05-08 22:30:26 +08:00
|
|
|
|
def _try_ddgs(self, query: str, max_results: int) -> Optional[List[SearchResult]]:
|
|
|
|
|
|
"""尝试 DuckDuckGo 搜索"""
|
2026-04-30 22:06:01 +08:00
|
|
|
|
try:
|
2026-05-08 22:30:26 +08:00
|
|
|
|
from ddgs import DDGS
|
|
|
|
|
|
|
2026-05-01 00:13:19 +08:00
|
|
|
|
results = []
|
2026-05-08 22:30:26 +08:00
|
|
|
|
with DDGS() as ddgs:
|
|
|
|
|
|
for r in ddgs.text(query, max_results=max_results):
|
|
|
|
|
|
results.append(SearchResult(
|
|
|
|
|
|
title=r.get("title", ""),
|
|
|
|
|
|
url=r.get("href", ""),
|
|
|
|
|
|
snippet=r.get("body", ""),
|
|
|
|
|
|
source="DuckDuckGo"
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
if results:
|
|
|
|
|
|
info(f"[WebSearch] ddgs 返回 {len(results)} 条结果")
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
info("[WebSearch] ddgs 未安装")
|
2026-05-01 00:13:19 +08:00
|
|
|
|
except Exception as e:
|
2026-05-08 22:30:26 +08:00
|
|
|
|
info(f"[WebSearch] ddgs 搜索失败: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def _get_mock_results(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
|
|
|
|
|
|
"""获取模拟搜索结果(兜底方案)"""
|
|
|
|
|
|
info(f"[WebSearch] 使用模拟搜索结果")
|
|
|
|
|
|
|
|
|
|
|
|
templates = [
|
2026-04-30 22:06:01 +08:00
|
|
|
|
{
|
|
|
|
|
|
"title": f"关于「{query}」的相关介绍",
|
2026-05-01 00:13:19 +08:00
|
|
|
|
"snippet": "这是模拟结果。如需真实搜索,请检查容器网络连接或配置代理。",
|
2026-04-30 22:06:01 +08:00
|
|
|
|
"url": "https://example.com/about"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"title": f"「{query}」 - 最新动态",
|
2026-05-01 00:13:19 +08:00
|
|
|
|
"snippet": "提示:在容器内运行时,需要确保能访问外网。",
|
2026-04-30 22:06:01 +08:00
|
|
|
|
"url": "https://example.com/latest"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"title": f"了解更多关于「{query}」的内容",
|
2026-05-01 00:13:19 +08:00
|
|
|
|
"snippet": "建议:检查 Docker 网络配置,或使用代理。",
|
2026-04-30 22:06:01 +08:00
|
|
|
|
"url": "https://example.com/more"
|
|
|
|
|
|
}
|
2026-04-29 23:10:15 +08:00
|
|
|
|
]
|
2026-05-08 22:30:26 +08:00
|
|
|
|
|
2026-04-29 23:10:15 +08:00
|
|
|
|
num = max_results or self.max_results
|
2026-04-30 22:06:01 +08:00
|
|
|
|
results = []
|
2026-05-08 22:30:26 +08:00
|
|
|
|
|
|
|
|
|
|
for template in templates[:num]:
|
2026-04-30 22:06:01 +08:00
|
|
|
|
results.append(SearchResult(
|
|
|
|
|
|
title=template["title"],
|
|
|
|
|
|
url=template["url"],
|
|
|
|
|
|
snippet=template["snippet"],
|
|
|
|
|
|
source="模拟数据"
|
|
|
|
|
|
))
|
2026-05-08 22:30:26 +08:00
|
|
|
|
|
2026-04-30 22:06:01 +08:00
|
|
|
|
return results
|
2026-04-29 23:10:15 +08:00
|
|
|
|
|
2026-05-09 01:51:18 +08:00
|
|
|
|
def format_search_results(self, results: List[SearchResult], query: str = "") -> str:
|
2026-04-29 23:10:15 +08:00
|
|
|
|
"""
|
2026-05-09 01:51:18 +08:00
|
|
|
|
格式化搜索结果(使用模板渲染)
|
2026-05-08 22:30:26 +08:00
|
|
|
|
|
2026-04-29 23:10:15 +08:00
|
|
|
|
Args:
|
|
|
|
|
|
results: 搜索结果列表
|
2026-05-09 01:51:18 +08:00
|
|
|
|
query: 搜索关键词
|
2026-05-08 22:30:26 +08:00
|
|
|
|
|
2026-04-29 23:10:15 +08:00
|
|
|
|
Returns:
|
|
|
|
|
|
格式化后的 Markdown 文本
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not results:
|
|
|
|
|
|
return "未找到相关搜索结果"
|
2026-05-08 22:30:26 +08:00
|
|
|
|
|
2026-05-09 01:51:18 +08:00
|
|
|
|
from backend.app.core import get_formatter
|
|
|
|
|
|
formatter = get_formatter()
|
|
|
|
|
|
|
|
|
|
|
|
# 转换为字典列表供模板使用
|
|
|
|
|
|
result_dicts = []
|
|
|
|
|
|
for r in results:
|
|
|
|
|
|
result_dicts.append({
|
|
|
|
|
|
"title": r.title,
|
|
|
|
|
|
"url": r.url,
|
|
|
|
|
|
"snippet": r.snippet,
|
|
|
|
|
|
"source": r.source,
|
|
|
|
|
|
"timestamp": r.timestamp.strftime('%Y-%m-%d %H:%M:%S') if r.timestamp else "",
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
return formatter.render(
|
|
|
|
|
|
"web_search_result",
|
|
|
|
|
|
query=query,
|
|
|
|
|
|
result_count=len(results),
|
|
|
|
|
|
results=result_dicts,
|
|
|
|
|
|
citation_note="💡 **引用溯源说明**:以上搜索结果均标注了来源链接,可通过链接追溯原始信息。"
|
|
|
|
|
|
)
|
2026-04-29 23:10:15 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 单例实例
|
|
|
|
|
|
_web_search_tool = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_web_search_tool() -> WebSearchTool:
|
|
|
|
|
|
"""获取联网搜索工具单例"""
|
|
|
|
|
|
global _web_search_tool
|
|
|
|
|
|
if _web_search_tool is None:
|
|
|
|
|
|
_web_search_tool = WebSearchTool()
|
|
|
|
|
|
return _web_search_tool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def web_search(query: str, max_results: int = 5) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
便捷函数:联网搜索并返回格式化结果
|
2026-05-08 22:30:26 +08:00
|
|
|
|
|
2026-04-29 23:10:15 +08:00
|
|
|
|
Args:
|
|
|
|
|
|
query: 搜索关键词
|
|
|
|
|
|
max_results: 返回结果数量
|
2026-05-08 22:30:26 +08:00
|
|
|
|
|
2026-04-29 23:10:15 +08:00
|
|
|
|
Returns:
|
|
|
|
|
|
格式化后的搜索结果文本
|
|
|
|
|
|
"""
|
|
|
|
|
|
tool = get_web_search_tool()
|
|
|
|
|
|
results = tool.search(query, max_results)
|
2026-05-09 01:51:18 +08:00
|
|
|
|
return tool.format_search_results(results, query=query)
|