Files
ailine/backend/app/core/web_search.py
root 4c119073bc
All checks were successful
构建并部署 AI Agent 服务 / deploy (push) Successful in 6m6s
优化输出
2026-05-09 01:51:18 +08:00

224 lines
6.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
联网搜索公共工具 - 无需 API Key免费使用 DuckDuckGo
Web Search Public Utility - Free, no API Key, using DuckDuckGo
"""
from typing import List, Optional
from dataclasses import dataclass
from datetime import datetime
from backend.app.logger import info
@dataclass
class SearchResult:
"""搜索结果数据类"""
title: str
url: str
snippet: str
source: str = "DuckDuckGo"
timestamp: datetime = None
def __post_init__(self):
if self.timestamp is None:
self.timestamp = datetime.now()
class WebSearchTool:
"""联网搜索公共工具类"""
def __init__(self, max_results: int = 5):
self.max_results = max_results
def search(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
"""
使用多种方式搜索,按优先级尝试
Args:
query: 搜索关键词
max_results: 返回结果数量,默认使用初始化时的设置
Returns:
搜索结果列表
"""
num_results = max_results or self.max_results
# 尝试搜索方式,按优先级
result = self._try_tavily(query, num_results)
if result is not None:
return result
result = self._try_ddgs(query, num_results)
if result is not None:
return result
# 兜底方案
return self._get_mock_results(query, num_results)
def _try_tavily(self, query: str, max_results: int) -> Optional[List[SearchResult]]:
"""尝试 Tavily API 搜索"""
try:
return self._search_tavily(query, max_results)
except ImportError:
info("[WebSearch] tavily 未安装")
except Exception as e:
error_msg = str(e)
if "API_KEY" in error_msg or "未配置" in error_msg:
info(f"[WebSearch] Tavily API Key 未配置")
else:
info(f"[WebSearch] Tavily 搜索失败: {e}")
return None
def _search_tavily(self, query: str, max_results: int) -> List[SearchResult]:
"""使用 Tavily API 搜索"""
from tavily import TavilyClient
from backend.app.config import TAVILY_API_KEY, TAVILY_MAX_RESULTS
if not TAVILY_API_KEY:
raise ValueError("TAVILY_API_KEY 未配置")
client = TavilyClient(api_key=TAVILY_API_KEY)
response = client.search(
query=query,
max_results=min(max_results, TAVILY_MAX_RESULTS or 5),
include_answer=True,
include_raw_content=False
)
results = []
for item in response.get("results", []):
results.append(SearchResult(
title=item.get("title", ""),
url=item.get("url", ""),
snippet=item.get("content", ""),
source="Tavily"
))
info(f"[WebSearch] Tavily 返回 {len(results)} 条结果")
return results
def _try_ddgs(self, query: str, max_results: int) -> Optional[List[SearchResult]]:
"""尝试 DuckDuckGo 搜索"""
try:
from ddgs import DDGS
results = []
with DDGS() as ddgs:
for r in ddgs.text(query, max_results=max_results):
results.append(SearchResult(
title=r.get("title", ""),
url=r.get("href", ""),
snippet=r.get("body", ""),
source="DuckDuckGo"
))
if results:
info(f"[WebSearch] ddgs 返回 {len(results)} 条结果")
return results
except ImportError:
info("[WebSearch] ddgs 未安装")
except Exception as e:
info(f"[WebSearch] ddgs 搜索失败: {e}")
return None
def _get_mock_results(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
"""获取模拟搜索结果(兜底方案)"""
info(f"[WebSearch] 使用模拟搜索结果")
templates = [
{
"title": f"关于「{query}」的相关介绍",
"snippet": "这是模拟结果。如需真实搜索,请检查容器网络连接或配置代理。",
"url": "https://example.com/about"
},
{
"title": f"{query}」 - 最新动态",
"snippet": "提示:在容器内运行时,需要确保能访问外网。",
"url": "https://example.com/latest"
},
{
"title": f"了解更多关于「{query}」的内容",
"snippet": "建议:检查 Docker 网络配置,或使用代理。",
"url": "https://example.com/more"
}
]
num = max_results or self.max_results
results = []
for template in templates[:num]:
results.append(SearchResult(
title=template["title"],
url=template["url"],
snippet=template["snippet"],
source="模拟数据"
))
return results
def format_search_results(self, results: List[SearchResult], query: str = "") -> str:
"""
格式化搜索结果(使用模板渲染)
Args:
results: 搜索结果列表
query: 搜索关键词
Returns:
格式化后的 Markdown 文本
"""
if not results:
return "未找到相关搜索结果"
from backend.app.core import get_formatter
formatter = get_formatter()
# 转换为字典列表供模板使用
result_dicts = []
for r in results:
result_dicts.append({
"title": r.title,
"url": r.url,
"snippet": r.snippet,
"source": r.source,
"timestamp": r.timestamp.strftime('%Y-%m-%d %H:%M:%S') if r.timestamp else "",
})
return formatter.render(
"web_search_result",
query=query,
result_count=len(results),
results=result_dicts,
citation_note="💡 **引用溯源说明**:以上搜索结果均标注了来源链接,可通过链接追溯原始信息。"
)
# 单例实例
_web_search_tool = None
def get_web_search_tool() -> WebSearchTool:
"""获取联网搜索工具单例"""
global _web_search_tool
if _web_search_tool is None:
_web_search_tool = WebSearchTool()
return _web_search_tool
def web_search(query: str, max_results: int = 5) -> str:
"""
便捷函数:联网搜索并返回格式化结果
Args:
query: 搜索关键词
max_results: 返回结果数量
Returns:
格式化后的搜索结果文本
"""
tool = get_web_search_tool()
results = tool.search(query, max_results)
return tool.format_search_results(results, query=query)