- 将默认嵌入模型从 embedding-3 (2048维) 改为 embedding-2 (1024维),解决Qdrant维度不匹配问题 - 优化web_search.py,增加详细日志输出和更好的错误处理 - 更新模拟搜索结果,增加提示信息
This commit is contained in:
@@ -43,8 +43,9 @@ DEEPSEEK_API_KEY = _get_str("DEEPSEEK_API_KEY")
|
|||||||
|
|
||||||
# ========== 智谱 API 配置 ==========
|
# ========== 智谱 API 配置 ==========
|
||||||
# 嵌入模型:根据 https://docs.bigmodel.cn/cn/guide/start/model-overview
|
# 嵌入模型:根据 https://docs.bigmodel.cn/cn/guide/start/model-overview
|
||||||
# 可选:embedding-2、embedding-3
|
# 可选:embedding-2 (1024维)、embedding-3 (2048维)
|
||||||
ZHIPU_EMBEDDING_MODEL = _get_str("ZHIPU_EMBEDDING_MODEL") or "embedding-3"
|
# 注意:如果 Qdrant collection 是1024维,请使用 embedding-2
|
||||||
|
ZHIPU_EMBEDDING_MODEL = _get_str("ZHIPU_EMBEDDING_MODEL") or "embedding-2"
|
||||||
# 重排模型:可选 rerank-1、rerank-2
|
# 重排模型:可选 rerank-1、rerank-2
|
||||||
ZHIPU_RERANK_MODEL = _get_str("ZHIPU_RERANK_MODEL") or "rerank-2"
|
ZHIPU_RERANK_MODEL = _get_str("ZHIPU_RERANK_MODEL") or "rerank-2"
|
||||||
ZHIPU_API_BASE = _get_str("ZHIPU_API_BASE") or "https://open.bigmodel.cn/api/paas/v4"
|
ZHIPU_API_BASE = _get_str("ZHIPU_API_BASE") or "https://open.bigmodel.cn/api/paas/v4"
|
||||||
|
|||||||
@@ -39,55 +39,85 @@ class WebSearchTool:
|
|||||||
Returns:
|
Returns:
|
||||||
搜索结果列表
|
搜索结果列表
|
||||||
"""
|
"""
|
||||||
|
num_results = max_results or self.max_results
|
||||||
|
|
||||||
|
# 方法1: 尝试使用 duckduckgo-search 库
|
||||||
try:
|
try:
|
||||||
from duckduckgo_search import DDGS
|
from duckduckgo_search import DDGS
|
||||||
|
|
||||||
num_results = max_results or self.max_results
|
print(f"[WebSearch] 使用 DuckDuckGo 搜索: {query}")
|
||||||
|
|
||||||
with DDGS() as ddgs:
|
with DDGS() as ddgs:
|
||||||
results = ddgs.text(query, max_results=num_results)
|
results = list(ddgs.text(query, max_results=num_results))
|
||||||
|
|
||||||
search_results = []
|
if results:
|
||||||
for r in results:
|
search_results = []
|
||||||
search_results.append(SearchResult(
|
for r in results:
|
||||||
title=r.get("title", ""),
|
search_results.append(SearchResult(
|
||||||
url=r.get("href", ""),
|
title=r.get("title", ""),
|
||||||
snippet=r.get("body", ""),
|
url=r.get("href", ""),
|
||||||
source="DuckDuckGo"
|
snippet=r.get("body", ""),
|
||||||
))
|
source="DuckDuckGo"
|
||||||
|
))
|
||||||
return search_results
|
|
||||||
|
|
||||||
|
print(f"[WebSearch] DuckDuckGo 返回 {len(search_results)} 条结果")
|
||||||
|
return search_results
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# 如果 duckduckgo-search 未安装,返回模拟数据
|
print("[WebSearch] duckduckgo-search 未安装,尝试备用方案")
|
||||||
return self._search_mock(query, max_results)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"搜索出错:{e}")
|
print(f"[WebSearch] DuckDuckGo 搜索失败: {e}")
|
||||||
# 出错时返回模拟数据
|
|
||||||
return self._search_mock(query, max_results)
|
# 方法2: 尝试使用 requests 直接调用简单搜索API
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
|
||||||
|
print(f"[WebSearch] 使用备用搜索方案")
|
||||||
|
|
||||||
|
# 使用百度搜索的简易接口(仅作演示)
|
||||||
|
# 或者返回一些模拟的提示结果
|
||||||
|
return self._search_mock(query, num_results)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[WebSearch] 备用方案也失败: {e}")
|
||||||
|
|
||||||
|
# 方法3: 返回模拟数据作为最后兜底
|
||||||
|
return self._search_mock(query, num_results)
|
||||||
|
|
||||||
def _search_mock(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
|
def _search_mock(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]:
|
||||||
"""模拟搜索结果(兜底方案)"""
|
"""模拟搜索结果(兜底方案)"""
|
||||||
mock_results = [
|
print(f"[WebSearch] 使用模拟搜索结果 (查询: {query})")
|
||||||
SearchResult(
|
|
||||||
title=f"{query} - 搜索结果 1",
|
# 根据查询内容生成更有意义的模拟结果
|
||||||
url="https://example.com/result1",
|
mock_templates = [
|
||||||
snippet=f"这是关于 {query} 的模拟搜索结果,包含相关信息摘要...",
|
{
|
||||||
),
|
"title": f"关于「{query}」的相关介绍",
|
||||||
SearchResult(
|
"snippet": "这是一个模拟的搜索结果。在实际部署中,需要确保网络连接正常,或者配置其他可用的搜索API。",
|
||||||
title=f"{query} - 搜索结果 2",
|
"url": "https://example.com/about"
|
||||||
url="https://example.com/result2",
|
},
|
||||||
snippet=f"更多关于 {query} 的内容,涵盖多个方面和细节...",
|
{
|
||||||
),
|
"title": f"「{query}」 - 最新动态",
|
||||||
SearchResult(
|
"snippet": "搜索功能暂时使用模拟数据。请检查容器网络配置,或联系管理员配置可用的搜索服务。",
|
||||||
title=f"{query} - 搜索结果 3",
|
"url": "https://example.com/latest"
|
||||||
url="https://example.com/result3",
|
},
|
||||||
snippet=f"深入分析 {query} 的各个维度,提供全面的视角...",
|
{
|
||||||
),
|
"title": f"了解更多关于「{query}」的内容",
|
||||||
|
"snippet": "提示:如果需要真实的联网搜索,可以考虑使用 Bing Search API、Google Custom Search JSON API 或其他商用搜索服务。",
|
||||||
|
"url": "https://example.com/more"
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
num = max_results or self.max_results
|
num = max_results or self.max_results
|
||||||
return mock_results[:num]
|
results = []
|
||||||
|
|
||||||
|
for i, template in enumerate(mock_templates[:num]):
|
||||||
|
results.append(SearchResult(
|
||||||
|
title=template["title"],
|
||||||
|
url=template["url"],
|
||||||
|
snippet=template["snippet"],
|
||||||
|
source="模拟数据"
|
||||||
|
))
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
def format_search_results(self, results: List[SearchResult]) -> str:
|
def format_search_results(self, results: List[SearchResult]) -> str:
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user