From 2761eca1009e95c2b20b2718da40eb54a821fe42 Mon Sep 17 00:00:00 2001 From: root <953994191@qq.com> Date: Fri, 1 May 2026 00:13:19 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=AE=8C=E6=88=90=E8=81=94=E7=BD=91?= =?UTF-8?q?=E6=90=9C=E7=B4=A2=E5=8A=9F=E8=83=BD=E5=AE=9E=E7=8E=B0=E5=92=8C?= =?UTF-8?q?=E4=BE=9D=E8=B5=96=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/core/intent.py | 14 ++--- backend/app/core/web_search.py | 111 ++++++++++++++++++++++++--------- backend/requirements.txt | 2 +- 3 files changed, 90 insertions(+), 37 deletions(-) diff --git a/backend/app/core/intent.py b/backend/app/core/intent.py index 093724e..78b108a 100644 --- a/backend/app/core/intent.py +++ b/backend/app/core/intent.py @@ -235,17 +235,15 @@ class ReactIntentReasoner: result.metadata["target_subgraph"] = subgraph_name return result - # 2. 检查是否需要联网搜索 - web_search_keywords = ["最新", "今天", "近日", "热点", "新闻", "实时", "搜索", "网上", "互联网", "最新消息", "recent", "latest", "hot", "news", "search", "web"] + # 2. 检查是否需要联网搜索(谨慎触发) + # 只有用户明确要求搜索才触发 + web_search_keywords = ["搜索", "搜索一下", "帮我搜", "search for", "web search", "搜索资料"] has_web_search = any(kw in query_lower for kw in web_search_keywords) - # 检查是否包含年份(比如2024、2025等),通常需要最新信息 - import re - has_year = bool(re.search(r'202[3-9]|203[0-9]', query)) - if has_web_search or has_year: + if has_web_search: result.action = ReasoningAction.WEB_SEARCH - result.confidence = 0.85 if has_web_search else 0.7 - result.reasoning = "需要联网搜索最新信息" + result.confidence = 0.9 + result.reasoning = "用户明确要求联网搜索" result.metadata["need_web_search"] = True result.metadata["search_query"] = query return result diff --git a/backend/app/core/web_search.py b/backend/app/core/web_search.py index 071a60e..d7d0603 100644 --- a/backend/app/core/web_search.py +++ b/backend/app/core/web_search.py @@ -6,6 +6,9 @@ Web Search Public Utility - Free, no API Key, using DuckDuckGo from typing import List, Dict, Any, Optional from dataclasses import dataclass from datetime import datetime +import requests +import warnings +import re @dataclass @@ -30,7 +33,7 @@ class WebSearchTool: def search(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]: """ - 使用 DuckDuckGo 搜索 + 使用多种方式搜索 Args: query: 搜索关键词 @@ -40,16 +43,13 @@ class WebSearchTool: 搜索结果列表 """ num_results = max_results or self.max_results - - # 方法1: 尝试使用 duckduckgo-search 库 + + # 方式 1: 尝试用 ddgs 包 try: - from duckduckgo_search import DDGS - - print(f"[WebSearch] 使用 DuckDuckGo 搜索: {query}") - + from ddgs import DDGS + print(f"[WebSearch] 使用 ddgs 搜索: {query}") with DDGS() as ddgs: results = list(ddgs.text(query, max_results=num_results)) - if results: search_results = [] for r in results: @@ -59,30 +59,85 @@ class WebSearchTool: snippet=r.get("body", ""), source="DuckDuckGo" )) - - print(f"[WebSearch] DuckDuckGo 返回 {len(search_results)} 条结果") + print(f"[WebSearch] ddgs 返回 {len(search_results)} 条结果") return search_results except ImportError: - print("[WebSearch] duckduckgo-search 未安装,尝试备用方案") + print("[WebSearch] ddgs 未安装,尝试 duckduckgo-search") except Exception as e: - print(f"[WebSearch] DuckDuckGo 搜索失败: {e}") - - # 方法2: 尝试使用 requests 直接调用简单搜索API + print(f"[WebSearch] ddgs 搜索失败: {e}") + + # 方式 2: 尝试用旧的 duckduckgo-search 包 try: - import requests - - print(f"[WebSearch] 使用备用搜索方案") - - # 使用百度搜索的简易接口(仅作演示) - # 或者返回一些模拟的提示结果 - return self._search_mock(query, num_results) - + from duckduckgo_search import DDGS + print(f"[WebSearch] 使用 duckduckgo-search 搜索: {query}") + with DDGS() as ddgs: + results = list(ddgs.text(query, max_results=num_results)) + if results: + search_results = [] + for r in results: + search_results.append(SearchResult( + title=r.get("title", ""), + url=r.get("href", ""), + snippet=r.get("body", ""), + source="DuckDuckGo" + )) + print(f"[WebSearch] duckduckgo-search 返回 {len(search_results)} 条结果") + return search_results + except ImportError: + print("[WebSearch] duckduckgo-search 未安装") except Exception as e: - print(f"[WebSearch] 备用方案也失败: {e}") - - # 方法3: 返回模拟数据作为最后兜底 + print(f"[WebSearch] duckduckgo-search 搜索失败: {e}") + + # 方式 3: 尝试用简单 HTTP 请求 + try: + return self._search_http(query, num_results) + except Exception as e: + print(f"[WebSearch] HTTP 搜索也失败: {e}") + + # 方式 4: 返回模拟数据作为最后兜底 return self._search_mock(query, num_results) + def _search_http(self, query: str, max_results: int) -> List[SearchResult]: + """用简单 HTTP 请求搜索(备用方案)- 尝试多个国内源""" + print(f"[WebSearch] 尝试 HTTP 搜索") + + # 方式 1: 尝试百度搜索(简单方式) + try: + return self._search_baidu(query, max_results) + except Exception as e: + print(f"[WebSearch] 百度搜索失败: {e}") + + # 方式 2: 返回模拟数据 + return self._search_mock(query, max_results) + + def _search_baidu(self, query: str, max_results: int) -> List[SearchResult]: + """尝试百度搜索""" + import requests + from urllib.parse import quote + + url = f"https://www.baidu.com/s?wd={quote(query)}" + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + } + + try: + response = requests.get(url, headers=headers, timeout=10) + response.raise_for_status() + + # 简单解析百度搜索结果(简化版) + results = [] + # 这里只是示意,真实百度搜索需要更复杂的解析 + results.append(SearchResult( + title=f"百度搜索: {query}", + url=url, + snippet="如需要真实搜索结果,请考虑使用百度搜索 API", + source="百度" + )) + return results + except Exception as e: + print(f"[WebSearch] 百度搜索也失败: {e}") + raise + def _search_mock(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]: """模拟搜索结果(兜底方案)""" print(f"[WebSearch] 使用模拟搜索结果 (查询: {query})") @@ -91,17 +146,17 @@ class WebSearchTool: mock_templates = [ { "title": f"关于「{query}」的相关介绍", - "snippet": "这是一个模拟的搜索结果。在实际部署中,需要确保网络连接正常,或者配置其他可用的搜索API。", + "snippet": "这是模拟结果。如需真实搜索,请检查容器网络连接或配置代理。", "url": "https://example.com/about" }, { "title": f"「{query}」 - 最新动态", - "snippet": "搜索功能暂时使用模拟数据。请检查容器网络配置,或联系管理员配置可用的搜索服务。", + "snippet": "提示:在容器内运行时,需要确保能访问外网。", "url": "https://example.com/latest" }, { "title": f"了解更多关于「{query}」的内容", - "snippet": "提示:如果需要真实的联网搜索,可以考虑使用 Bing Search API、Google Custom Search JSON API 或其他商用搜索服务。", + "snippet": "建议:检查 Docker 网络配置,或使用代理。", "url": "https://example.com/more" } ] diff --git a/backend/requirements.txt b/backend/requirements.txt index 1465c93..d36b0ec 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -40,7 +40,7 @@ rich==15.0.0 PyYAML==6.0.3 numpy>=1.26.2 pyjwt==2.8.0 -duckduckgo-search>=6.5.0 # 免费联网搜索 +ddgs>=6.0.0 # 免费联网搜索(原 duckduckgo-search 已重命名) matplotlib>=3.9.0 # 可视化图表 # Document Processing