This commit is contained in:
@@ -235,17 +235,15 @@ class ReactIntentReasoner:
|
|||||||
result.metadata["target_subgraph"] = subgraph_name
|
result.metadata["target_subgraph"] = subgraph_name
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# 2. 检查是否需要联网搜索
|
# 2. 检查是否需要联网搜索(谨慎触发)
|
||||||
web_search_keywords = ["最新", "今天", "近日", "热点", "新闻", "实时", "搜索", "网上", "互联网", "最新消息", "recent", "latest", "hot", "news", "search", "web"]
|
# 只有用户明确要求搜索才触发
|
||||||
|
web_search_keywords = ["搜索", "搜索一下", "帮我搜", "search for", "web search", "搜索资料"]
|
||||||
has_web_search = any(kw in query_lower for kw in web_search_keywords)
|
has_web_search = any(kw in query_lower for kw in web_search_keywords)
|
||||||
# 检查是否包含年份(比如2024、2025等),通常需要最新信息
|
|
||||||
import re
|
|
||||||
has_year = bool(re.search(r'202[3-9]|203[0-9]', query))
|
|
||||||
|
|
||||||
if has_web_search or has_year:
|
if has_web_search:
|
||||||
result.action = ReasoningAction.WEB_SEARCH
|
result.action = ReasoningAction.WEB_SEARCH
|
||||||
result.confidence = 0.85 if has_web_search else 0.7
|
result.confidence = 0.9
|
||||||
result.reasoning = "需要联网搜索最新信息"
|
result.reasoning = "用户明确要求联网搜索"
|
||||||
result.metadata["need_web_search"] = True
|
result.metadata["need_web_search"] = True
|
||||||
result.metadata["search_query"] = query
|
result.metadata["search_query"] = query
|
||||||
return result
|
return result
|
||||||
|
|||||||
@@ -6,6 +6,9 @@ Web Search Public Utility - Free, no API Key, using DuckDuckGo
|
|||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import requests
|
||||||
|
import warnings
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -30,7 +33,7 @@ class WebSearchTool:
|
|||||||
|
|
||||||
def search(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
|
def search(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
|
||||||
"""
|
"""
|
||||||
使用 DuckDuckGo 搜索
|
使用多种方式搜索
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: 搜索关键词
|
query: 搜索关键词
|
||||||
@@ -40,16 +43,13 @@ class WebSearchTool:
|
|||||||
搜索结果列表
|
搜索结果列表
|
||||||
"""
|
"""
|
||||||
num_results = max_results or self.max_results
|
num_results = max_results or self.max_results
|
||||||
|
|
||||||
# 方法1: 尝试使用 duckduckgo-search 库
|
# 方式 1: 尝试用 ddgs 包
|
||||||
try:
|
try:
|
||||||
from duckduckgo_search import DDGS
|
from ddgs import DDGS
|
||||||
|
print(f"[WebSearch] 使用 ddgs 搜索: {query}")
|
||||||
print(f"[WebSearch] 使用 DuckDuckGo 搜索: {query}")
|
|
||||||
|
|
||||||
with DDGS() as ddgs:
|
with DDGS() as ddgs:
|
||||||
results = list(ddgs.text(query, max_results=num_results))
|
results = list(ddgs.text(query, max_results=num_results))
|
||||||
|
|
||||||
if results:
|
if results:
|
||||||
search_results = []
|
search_results = []
|
||||||
for r in results:
|
for r in results:
|
||||||
@@ -59,30 +59,85 @@ class WebSearchTool:
|
|||||||
snippet=r.get("body", ""),
|
snippet=r.get("body", ""),
|
||||||
source="DuckDuckGo"
|
source="DuckDuckGo"
|
||||||
))
|
))
|
||||||
|
print(f"[WebSearch] ddgs 返回 {len(search_results)} 条结果")
|
||||||
print(f"[WebSearch] DuckDuckGo 返回 {len(search_results)} 条结果")
|
|
||||||
return search_results
|
return search_results
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print("[WebSearch] duckduckgo-search 未安装,尝试备用方案")
|
print("[WebSearch] ddgs 未安装,尝试 duckduckgo-search")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[WebSearch] DuckDuckGo 搜索失败: {e}")
|
print(f"[WebSearch] ddgs 搜索失败: {e}")
|
||||||
|
|
||||||
# 方法2: 尝试使用 requests 直接调用简单搜索API
|
# 方式 2: 尝试用旧的 duckduckgo-search 包
|
||||||
try:
|
try:
|
||||||
import requests
|
from duckduckgo_search import DDGS
|
||||||
|
print(f"[WebSearch] 使用 duckduckgo-search 搜索: {query}")
|
||||||
print(f"[WebSearch] 使用备用搜索方案")
|
with DDGS() as ddgs:
|
||||||
|
results = list(ddgs.text(query, max_results=num_results))
|
||||||
# 使用百度搜索的简易接口(仅作演示)
|
if results:
|
||||||
# 或者返回一些模拟的提示结果
|
search_results = []
|
||||||
return self._search_mock(query, num_results)
|
for r in results:
|
||||||
|
search_results.append(SearchResult(
|
||||||
|
title=r.get("title", ""),
|
||||||
|
url=r.get("href", ""),
|
||||||
|
snippet=r.get("body", ""),
|
||||||
|
source="DuckDuckGo"
|
||||||
|
))
|
||||||
|
print(f"[WebSearch] duckduckgo-search 返回 {len(search_results)} 条结果")
|
||||||
|
return search_results
|
||||||
|
except ImportError:
|
||||||
|
print("[WebSearch] duckduckgo-search 未安装")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[WebSearch] 备用方案也失败: {e}")
|
print(f"[WebSearch] duckduckgo-search 搜索失败: {e}")
|
||||||
|
|
||||||
# 方法3: 返回模拟数据作为最后兜底
|
# 方式 3: 尝试用简单 HTTP 请求
|
||||||
|
try:
|
||||||
|
return self._search_http(query, num_results)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WebSearch] HTTP 搜索也失败: {e}")
|
||||||
|
|
||||||
|
# 方式 4: 返回模拟数据作为最后兜底
|
||||||
return self._search_mock(query, num_results)
|
return self._search_mock(query, num_results)
|
||||||
|
|
||||||
|
def _search_http(self, query: str, max_results: int) -> List[SearchResult]:
|
||||||
|
"""用简单 HTTP 请求搜索(备用方案)- 尝试多个国内源"""
|
||||||
|
print(f"[WebSearch] 尝试 HTTP 搜索")
|
||||||
|
|
||||||
|
# 方式 1: 尝试百度搜索(简单方式)
|
||||||
|
try:
|
||||||
|
return self._search_baidu(query, max_results)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WebSearch] 百度搜索失败: {e}")
|
||||||
|
|
||||||
|
# 方式 2: 返回模拟数据
|
||||||
|
return self._search_mock(query, max_results)
|
||||||
|
|
||||||
|
def _search_baidu(self, query: str, max_results: int) -> List[SearchResult]:
|
||||||
|
"""尝试百度搜索"""
|
||||||
|
import requests
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
url = f"https://www.baidu.com/s?wd={quote(query)}"
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(url, headers=headers, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# 简单解析百度搜索结果(简化版)
|
||||||
|
results = []
|
||||||
|
# 这里只是示意,真实百度搜索需要更复杂的解析
|
||||||
|
results.append(SearchResult(
|
||||||
|
title=f"百度搜索: {query}",
|
||||||
|
url=url,
|
||||||
|
snippet="如需要真实搜索结果,请考虑使用百度搜索 API",
|
||||||
|
source="百度"
|
||||||
|
))
|
||||||
|
return results
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WebSearch] 百度搜索也失败: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
def _search_mock(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
|
def _search_mock(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
|
||||||
"""模拟搜索结果(兜底方案)"""
|
"""模拟搜索结果(兜底方案)"""
|
||||||
print(f"[WebSearch] 使用模拟搜索结果 (查询: {query})")
|
print(f"[WebSearch] 使用模拟搜索结果 (查询: {query})")
|
||||||
@@ -91,17 +146,17 @@ class WebSearchTool:
|
|||||||
mock_templates = [
|
mock_templates = [
|
||||||
{
|
{
|
||||||
"title": f"关于「{query}」的相关介绍",
|
"title": f"关于「{query}」的相关介绍",
|
||||||
"snippet": "这是一个模拟的搜索结果。在实际部署中,需要确保网络连接正常,或者配置其他可用的搜索API。",
|
"snippet": "这是模拟结果。如需真实搜索,请检查容器网络连接或配置代理。",
|
||||||
"url": "https://example.com/about"
|
"url": "https://example.com/about"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"title": f"「{query}」 - 最新动态",
|
"title": f"「{query}」 - 最新动态",
|
||||||
"snippet": "搜索功能暂时使用模拟数据。请检查容器网络配置,或联系管理员配置可用的搜索服务。",
|
"snippet": "提示:在容器内运行时,需要确保能访问外网。",
|
||||||
"url": "https://example.com/latest"
|
"url": "https://example.com/latest"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"title": f"了解更多关于「{query}」的内容",
|
"title": f"了解更多关于「{query}」的内容",
|
||||||
"snippet": "提示:如果需要真实的联网搜索,可以考虑使用 Bing Search API、Google Custom Search JSON API 或其他商用搜索服务。",
|
"snippet": "建议:检查 Docker 网络配置,或使用代理。",
|
||||||
"url": "https://example.com/more"
|
"url": "https://example.com/more"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ rich==15.0.0
|
|||||||
PyYAML==6.0.3
|
PyYAML==6.0.3
|
||||||
numpy>=1.26.2
|
numpy>=1.26.2
|
||||||
pyjwt==2.8.0
|
pyjwt==2.8.0
|
||||||
duckduckgo-search>=6.5.0 # 免费联网搜索
|
ddgs>=6.0.0 # 免费联网搜索(原 duckduckgo-search 已重命名)
|
||||||
matplotlib>=3.9.0 # 可视化图表
|
matplotlib>=3.9.0 # 可视化图表
|
||||||
|
|
||||||
# Document Processing
|
# Document Processing
|
||||||
|
|||||||
Reference in New Issue
Block a user