Files
ailine/backend/app/core/web_search.py
root b30f7b00a7
All checks were successful
构建并部署 AI Agent 服务 / deploy (push) Successful in 5m33s
优化查询代码,优化工具代码
2026-05-08 22:30:26 +08:00

218 lines
6.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
联网搜索公共工具 - 无需 API Key免费使用 DuckDuckGo
Web Search Public Utility - Free, no API Key, using DuckDuckGo
"""
from typing import List, Optional
from dataclasses import dataclass
from datetime import datetime
from backend.app.logger import info
@dataclass
class SearchResult:
"""搜索结果数据类"""
title: str
url: str
snippet: str
source: str = "DuckDuckGo"
timestamp: datetime = None
def __post_init__(self):
if self.timestamp is None:
self.timestamp = datetime.now()
class WebSearchTool:
"""联网搜索公共工具类"""
def __init__(self, max_results: int = 5):
self.max_results = max_results
def search(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
"""
使用多种方式搜索,按优先级尝试
Args:
query: 搜索关键词
max_results: 返回结果数量,默认使用初始化时的设置
Returns:
搜索结果列表
"""
num_results = max_results or self.max_results
# 尝试搜索方式,按优先级
result = self._try_tavily(query, num_results)
if result is not None:
return result
result = self._try_ddgs(query, num_results)
if result is not None:
return result
# 兜底方案
return self._get_mock_results(query, num_results)
def _try_tavily(self, query: str, max_results: int) -> Optional[List[SearchResult]]:
"""尝试 Tavily API 搜索"""
try:
return self._search_tavily(query, max_results)
except ImportError:
info("[WebSearch] tavily 未安装")
except Exception as e:
error_msg = str(e)
if "API_KEY" in error_msg or "未配置" in error_msg:
info(f"[WebSearch] Tavily API Key 未配置")
else:
info(f"[WebSearch] Tavily 搜索失败: {e}")
return None
def _search_tavily(self, query: str, max_results: int) -> List[SearchResult]:
"""使用 Tavily API 搜索"""
from tavily import TavilyClient
from backend.app.config import TAVILY_API_KEY, TAVILY_MAX_RESULTS
if not TAVILY_API_KEY:
raise ValueError("TAVILY_API_KEY 未配置")
client = TavilyClient(api_key=TAVILY_API_KEY)
response = client.search(
query=query,
max_results=min(max_results, TAVILY_MAX_RESULTS or 5),
include_answer=True,
include_raw_content=False
)
results = []
for item in response.get("results", []):
results.append(SearchResult(
title=item.get("title", ""),
url=item.get("url", ""),
snippet=item.get("content", ""),
source="Tavily"
))
info(f"[WebSearch] Tavily 返回 {len(results)} 条结果")
return results
def _try_ddgs(self, query: str, max_results: int) -> Optional[List[SearchResult]]:
"""尝试 DuckDuckGo 搜索"""
try:
from ddgs import DDGS
results = []
with DDGS() as ddgs:
for r in ddgs.text(query, max_results=max_results):
results.append(SearchResult(
title=r.get("title", ""),
url=r.get("href", ""),
snippet=r.get("body", ""),
source="DuckDuckGo"
))
if results:
info(f"[WebSearch] ddgs 返回 {len(results)} 条结果")
return results
except ImportError:
info("[WebSearch] ddgs 未安装")
except Exception as e:
info(f"[WebSearch] ddgs 搜索失败: {e}")
return None
def _get_mock_results(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
"""获取模拟搜索结果(兜底方案)"""
info(f"[WebSearch] 使用模拟搜索结果")
templates = [
{
"title": f"关于「{query}」的相关介绍",
"snippet": "这是模拟结果。如需真实搜索,请检查容器网络连接或配置代理。",
"url": "https://example.com/about"
},
{
"title": f"{query}」 - 最新动态",
"snippet": "提示:在容器内运行时,需要确保能访问外网。",
"url": "https://example.com/latest"
},
{
"title": f"了解更多关于「{query}」的内容",
"snippet": "建议:检查 Docker 网络配置,或使用代理。",
"url": "https://example.com/more"
}
]
num = max_results or self.max_results
results = []
for template in templates[:num]:
results.append(SearchResult(
title=template["title"],
url=template["url"],
snippet=template["snippet"],
source="模拟数据"
))
return results
def format_search_results(self, results: List[SearchResult]) -> str:
"""
格式化搜索结果(带引用溯源)
Args:
results: 搜索结果列表
Returns:
格式化后的 Markdown 文本
"""
if not results:
return "未找到相关搜索结果"
lines = ["## 🔍 联网搜索结果\n"]
for idx, result in enumerate(results, 1):
lines.append(f"### [{idx}] {result.title}")
lines.append(f"- 🔗 来源:[{result.url}]({result.url})")
lines.append(f"- 📝 摘要:{result.snippet}")
lines.append(f"- 📅 时间:{result.timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
lines.append("")
lines.append("---")
lines.append("💡 **引用溯源说明**")
lines.append("- 以上搜索结果均标注了来源链接")
lines.append("- 使用方括号数字标识引用(如 [1]、[2]")
lines.append("- 可通过链接追溯原始信息")
return "\n".join(lines)
# 单例实例
_web_search_tool = None
def get_web_search_tool() -> WebSearchTool:
"""获取联网搜索工具单例"""
global _web_search_tool
if _web_search_tool is None:
_web_search_tool = WebSearchTool()
return _web_search_tool
def web_search(query: str, max_results: int = 5) -> str:
"""
便捷函数:联网搜索并返回格式化结果
Args:
query: 搜索关键词
max_results: 返回结果数量
Returns:
格式化后的搜索结果文本
"""
tool = get_web_search_tool()
results = tool.search(query, max_results)
return tool.format_search_results(results)