""" 联网搜索公共工具 - 无需 API Key,免费使用 DuckDuckGo Web Search Public Utility - Free, no API Key, using DuckDuckGo """ from typing import List, Dict, Any, Optional from dataclasses import dataclass from datetime import datetime import requests import warnings import re @dataclass class SearchResult: """搜索结果数据类""" title: str url: str snippet: str source: str = "DuckDuckGo" timestamp: datetime = None def __post_init__(self): if self.timestamp is None: self.timestamp = datetime.now() class WebSearchTool: """联网搜索公共工具类""" def __init__(self, max_results: int = 5): self.max_results = max_results def search(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]: """ 使用多种方式搜索 Args: query: 搜索关键词 max_results: 返回结果数量,默认使用初始化时的设置 Returns: 搜索结果列表 """ num_results = max_results or self.max_results # 方式 1: 尝试用 ddgs 包 try: from ddgs import DDGS print(f"[WebSearch] 使用 ddgs 搜索: {query}") with DDGS() as ddgs: results = list(ddgs.text(query, max_results=num_results)) if results: search_results = [] for r in results: search_results.append(SearchResult( title=r.get("title", ""), url=r.get("href", ""), snippet=r.get("body", ""), source="DuckDuckGo" )) print(f"[WebSearch] ddgs 返回 {len(search_results)} 条结果") return search_results except ImportError: print("[WebSearch] ddgs 未安装,尝试 duckduckgo-search") except Exception as e: print(f"[WebSearch] ddgs 搜索失败: {e}") # 方式 2: 尝试用旧的 duckduckgo-search 包 try: from duckduckgo_search import DDGS print(f"[WebSearch] 使用 duckduckgo-search 搜索: {query}") with DDGS() as ddgs: results = list(ddgs.text(query, max_results=num_results)) if results: search_results = [] for r in results: search_results.append(SearchResult( title=r.get("title", ""), url=r.get("href", ""), snippet=r.get("body", ""), source="DuckDuckGo" )) print(f"[WebSearch] duckduckgo-search 返回 {len(search_results)} 条结果") return search_results except ImportError: print("[WebSearch] duckduckgo-search 未安装") except Exception as e: print(f"[WebSearch] duckduckgo-search 搜索失败: {e}") # 方式 3: 尝试用简单 HTTP 请求 try: return self._search_http(query, num_results) except Exception as e: print(f"[WebSearch] HTTP 搜索也失败: {e}") # 方式 4: 返回模拟数据作为最后兜底 return self._search_mock(query, num_results) def _search_http(self, query: str, max_results: int) -> List[SearchResult]: """用简单 HTTP 请求搜索(备用方案)- 尝试多个国内源""" print(f"[WebSearch] 尝试 HTTP 搜索") # 方式 1: 尝试百度搜索(简单方式) try: return self._search_baidu(query, max_results) except Exception as e: print(f"[WebSearch] 百度搜索失败: {e}") # 方式 2: 返回模拟数据 return self._search_mock(query, max_results) def _search_baidu(self, query: str, max_results: int) -> List[SearchResult]: """尝试百度搜索""" import requests from urllib.parse import quote url = f"https://www.baidu.com/s?wd={quote(query)}" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" } try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() # 简单解析百度搜索结果(简化版) results = [] # 这里只是示意,真实百度搜索需要更复杂的解析 results.append(SearchResult( title=f"百度搜索: {query}", url=url, snippet="如需要真实搜索结果,请考虑使用百度搜索 API", source="百度" )) return results except Exception as e: print(f"[WebSearch] 百度搜索也失败: {e}") raise def _search_mock(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]: """模拟搜索结果(兜底方案)""" print(f"[WebSearch] 使用模拟搜索结果 (查询: {query})") # 根据查询内容生成更有意义的模拟结果 mock_templates = [ { "title": f"关于「{query}」的相关介绍", "snippet": "这是模拟结果。如需真实搜索,请检查容器网络连接或配置代理。", "url": "https://example.com/about" }, { "title": f"「{query}」 - 最新动态", "snippet": "提示:在容器内运行时,需要确保能访问外网。", "url": "https://example.com/latest" }, { "title": f"了解更多关于「{query}」的内容", "snippet": "建议:检查 Docker 网络配置,或使用代理。", "url": "https://example.com/more" } ] num = max_results or self.max_results results = [] for i, template in enumerate(mock_templates[:num]): results.append(SearchResult( title=template["title"], url=template["url"], snippet=template["snippet"], source="模拟数据" )) return results def format_search_results(self, results: List[SearchResult]) -> str: """ 格式化搜索结果(带引用溯源) Args: results: 搜索结果列表 Returns: 格式化后的 Markdown 文本 """ if not results: return "未找到相关搜索结果" lines = [] lines.append("## 🔍 联网搜索结果\n") for idx, result in enumerate(results, 1): lines.append(f"### [{idx}] {result.title}") lines.append(f"- 🔗 来源:[{result.url}]({result.url})") lines.append(f"- 📝 摘要:{result.snippet}") lines.append(f"- 📅 时间:{result.timestamp.strftime('%Y-%m-%d %H:%M:%S')}") lines.append("") # 添加引用溯源说明 lines.append("---") lines.append("💡 **引用溯源说明**:") lines.append("- 以上搜索结果均标注了来源链接") lines.append("- 使用方括号数字标识引用(如 [1]、[2])") lines.append("- 可通过链接追溯原始信息") return "\n".join(lines) # 单例实例 _web_search_tool = None def get_web_search_tool() -> WebSearchTool: """获取联网搜索工具单例""" global _web_search_tool if _web_search_tool is None: _web_search_tool = WebSearchTool() return _web_search_tool def web_search(query: str, max_results: int = 5) -> str: """ 便捷函数:联网搜索并返回格式化结果 Args: query: 搜索关键词 max_results: 返回结果数量 Returns: 格式化后的搜索结果文本 """ tool = get_web_search_tool() results = tool.search(query, max_results) return tool.format_search_results(results)