2026-04-29 23:10:15 +08:00
|
|
|
|
"""
|
|
|
|
|
|
联网搜索公共工具 - 无需 API Key,免费使用 DuckDuckGo
|
|
|
|
|
|
Web Search Public Utility - Free, no API Key, using DuckDuckGo
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
from typing import List, Dict, Any, Optional
|
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
|
class SearchResult:
|
|
|
|
|
|
"""搜索结果数据类"""
|
|
|
|
|
|
title: str
|
|
|
|
|
|
url: str
|
|
|
|
|
|
snippet: str
|
|
|
|
|
|
source: str = "DuckDuckGo"
|
|
|
|
|
|
timestamp: datetime = None
|
|
|
|
|
|
|
|
|
|
|
|
def __post_init__(self):
|
|
|
|
|
|
if self.timestamp is None:
|
|
|
|
|
|
self.timestamp = datetime.now()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WebSearchTool:
|
|
|
|
|
|
"""联网搜索公共工具类"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, max_results: int = 5):
|
|
|
|
|
|
self.max_results = max_results
|
|
|
|
|
|
|
|
|
|
|
|
def search(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
使用 DuckDuckGo 搜索
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
query: 搜索关键词
|
|
|
|
|
|
max_results: 返回结果数量,默认使用初始化时的设置
|
2026-04-30 22:06:01 +08:00
|
|
|
|
|
2026-04-29 23:10:15 +08:00
|
|
|
|
Returns:
|
|
|
|
|
|
搜索结果列表
|
|
|
|
|
|
"""
|
2026-04-30 22:06:01 +08:00
|
|
|
|
num_results = max_results or self.max_results
|
|
|
|
|
|
|
|
|
|
|
|
# 方法1: 尝试使用 duckduckgo-search 库
|
2026-04-29 23:10:15 +08:00
|
|
|
|
try:
|
|
|
|
|
|
from duckduckgo_search import DDGS
|
|
|
|
|
|
|
2026-04-30 22:06:01 +08:00
|
|
|
|
print(f"[WebSearch] 使用 DuckDuckGo 搜索: {query}")
|
2026-04-29 23:10:15 +08:00
|
|
|
|
|
|
|
|
|
|
with DDGS() as ddgs:
|
2026-04-30 22:06:01 +08:00
|
|
|
|
results = list(ddgs.text(query, max_results=num_results))
|
2026-04-29 23:10:15 +08:00
|
|
|
|
|
2026-04-30 22:06:01 +08:00
|
|
|
|
if results:
|
|
|
|
|
|
search_results = []
|
|
|
|
|
|
for r in results:
|
|
|
|
|
|
search_results.append(SearchResult(
|
|
|
|
|
|
title=r.get("title", ""),
|
|
|
|
|
|
url=r.get("href", ""),
|
|
|
|
|
|
snippet=r.get("body", ""),
|
|
|
|
|
|
source="DuckDuckGo"
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
print(f"[WebSearch] DuckDuckGo 返回 {len(search_results)} 条结果")
|
|
|
|
|
|
return search_results
|
2026-04-29 23:10:15 +08:00
|
|
|
|
except ImportError:
|
2026-04-30 22:06:01 +08:00
|
|
|
|
print("[WebSearch] duckduckgo-search 未安装,尝试备用方案")
|
2026-04-29 23:10:15 +08:00
|
|
|
|
except Exception as e:
|
2026-04-30 22:06:01 +08:00
|
|
|
|
print(f"[WebSearch] DuckDuckGo 搜索失败: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
# 方法2: 尝试使用 requests 直接调用简单搜索API
|
|
|
|
|
|
try:
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
|
|
print(f"[WebSearch] 使用备用搜索方案")
|
|
|
|
|
|
|
|
|
|
|
|
# 使用百度搜索的简易接口(仅作演示)
|
|
|
|
|
|
# 或者返回一些模拟的提示结果
|
|
|
|
|
|
return self._search_mock(query, num_results)
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[WebSearch] 备用方案也失败: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
# 方法3: 返回模拟数据作为最后兜底
|
|
|
|
|
|
return self._search_mock(query, num_results)
|
2026-04-29 23:10:15 +08:00
|
|
|
|
|
|
|
|
|
|
def _search_mock(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
|
|
|
|
|
|
"""模拟搜索结果(兜底方案)"""
|
2026-04-30 22:06:01 +08:00
|
|
|
|
print(f"[WebSearch] 使用模拟搜索结果 (查询: {query})")
|
|
|
|
|
|
|
|
|
|
|
|
# 根据查询内容生成更有意义的模拟结果
|
|
|
|
|
|
mock_templates = [
|
|
|
|
|
|
{
|
|
|
|
|
|
"title": f"关于「{query}」的相关介绍",
|
|
|
|
|
|
"snippet": "这是一个模拟的搜索结果。在实际部署中,需要确保网络连接正常,或者配置其他可用的搜索API。",
|
|
|
|
|
|
"url": "https://example.com/about"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"title": f"「{query}」 - 最新动态",
|
|
|
|
|
|
"snippet": "搜索功能暂时使用模拟数据。请检查容器网络配置,或联系管理员配置可用的搜索服务。",
|
|
|
|
|
|
"url": "https://example.com/latest"
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"title": f"了解更多关于「{query}」的内容",
|
|
|
|
|
|
"snippet": "提示:如果需要真实的联网搜索,可以考虑使用 Bing Search API、Google Custom Search JSON API 或其他商用搜索服务。",
|
|
|
|
|
|
"url": "https://example.com/more"
|
|
|
|
|
|
}
|
2026-04-29 23:10:15 +08:00
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
num = max_results or self.max_results
|
2026-04-30 22:06:01 +08:00
|
|
|
|
results = []
|
|
|
|
|
|
|
|
|
|
|
|
for i, template in enumerate(mock_templates[:num]):
|
|
|
|
|
|
results.append(SearchResult(
|
|
|
|
|
|
title=template["title"],
|
|
|
|
|
|
url=template["url"],
|
|
|
|
|
|
snippet=template["snippet"],
|
|
|
|
|
|
source="模拟数据"
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
return results
|
2026-04-29 23:10:15 +08:00
|
|
|
|
|
|
|
|
|
|
def format_search_results(self, results: List[SearchResult]) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
格式化搜索结果(带引用溯源)
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
results: 搜索结果列表
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
格式化后的 Markdown 文本
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not results:
|
|
|
|
|
|
return "未找到相关搜索结果"
|
|
|
|
|
|
|
|
|
|
|
|
lines = []
|
|
|
|
|
|
lines.append("## 🔍 联网搜索结果\n")
|
|
|
|
|
|
|
|
|
|
|
|
for idx, result in enumerate(results, 1):
|
|
|
|
|
|
lines.append(f"### [{idx}] {result.title}")
|
|
|
|
|
|
lines.append(f"- 🔗 来源:[{result.url}]({result.url})")
|
|
|
|
|
|
lines.append(f"- 📝 摘要:{result.snippet}")
|
|
|
|
|
|
lines.append(f"- 📅 时间:{result.timestamp.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
|
|
|
|
lines.append("")
|
|
|
|
|
|
|
|
|
|
|
|
# 添加引用溯源说明
|
|
|
|
|
|
lines.append("---")
|
|
|
|
|
|
lines.append("💡 **引用溯源说明**:")
|
|
|
|
|
|
lines.append("- 以上搜索结果均标注了来源链接")
|
|
|
|
|
|
lines.append("- 使用方括号数字标识引用(如 [1]、[2])")
|
|
|
|
|
|
lines.append("- 可通过链接追溯原始信息")
|
|
|
|
|
|
|
|
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 单例实例
|
|
|
|
|
|
_web_search_tool = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_web_search_tool() -> WebSearchTool:
|
|
|
|
|
|
"""获取联网搜索工具单例"""
|
|
|
|
|
|
global _web_search_tool
|
|
|
|
|
|
if _web_search_tool is None:
|
|
|
|
|
|
_web_search_tool = WebSearchTool()
|
|
|
|
|
|
return _web_search_tool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def web_search(query: str, max_results: int = 5) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
便捷函数:联网搜索并返回格式化结果
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
query: 搜索关键词
|
|
|
|
|
|
max_results: 返回结果数量
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
格式化后的搜索结果文本
|
|
|
|
|
|
"""
|
|
|
|
|
|
tool = get_web_search_tool()
|
|
|
|
|
|
results = tool.search(query, max_results)
|
|
|
|
|
|
return tool.format_search_results(results)
|