From a362459d1fb2e2f373e3757854f6bc4a7283cc1a Mon Sep 17 00:00:00 2001 From: root <953994191@qq.com> Date: Thu, 30 Apr 2026 22:06:01 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=B5=8C=E5=85=A5?= =?UTF-8?q?=E7=BB=B4=E5=BA=A6=E4=B8=8D=E5=8C=B9=E9=85=8D=E5=92=8CWeb?= =?UTF-8?q?=E6=90=9C=E7=B4=A2=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 将默认嵌入模型从 embedding-3 (2048维) 改为 embedding-2 (1024维),解决Qdrant维度不匹配问题 - 优化web_search.py,增加详细日志输出和更好的错误处理 - 更新模拟搜索结果,增加提示信息 --- backend/app/config.py | 5 +- backend/app/core/web_search.py | 102 +++++++++++++++++++++------------ 2 files changed, 69 insertions(+), 38 deletions(-) diff --git a/backend/app/config.py b/backend/app/config.py index 5eebd4e..986a05d 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -43,8 +43,9 @@ DEEPSEEK_API_KEY = _get_str("DEEPSEEK_API_KEY") # ========== 智谱 API 配置 ========== # 嵌入模型:根据 https://docs.bigmodel.cn/cn/guide/start/model-overview -# 可选:embedding-2、embedding-3 -ZHIPU_EMBEDDING_MODEL = _get_str("ZHIPU_EMBEDDING_MODEL") or "embedding-3" +# 可选:embedding-2 (1024维)、embedding-3 (2048维) +# 注意:如果 Qdrant collection 是1024维,请使用 embedding-2 +ZHIPU_EMBEDDING_MODEL = _get_str("ZHIPU_EMBEDDING_MODEL") or "embedding-2" # 重排模型:可选 rerank-1、rerank-2 ZHIPU_RERANK_MODEL = _get_str("ZHIPU_RERANK_MODEL") or "rerank-2" ZHIPU_API_BASE = _get_str("ZHIPU_API_BASE") or "https://open.bigmodel.cn/api/paas/v4" diff --git a/backend/app/core/web_search.py b/backend/app/core/web_search.py index 55f0237..071a60e 100644 --- a/backend/app/core/web_search.py +++ b/backend/app/core/web_search.py @@ -35,59 +35,89 @@ class WebSearchTool: Args: query: 搜索关键词 max_results: 返回结果数量,默认使用初始化时的设置 - + Returns: 搜索结果列表 """ + num_results = max_results or self.max_results + + # 方法1: 尝试使用 duckduckgo-search 库 try: from duckduckgo_search import DDGS - num_results = max_results or self.max_results + print(f"[WebSearch] 使用 DuckDuckGo 搜索: {query}") with DDGS() as ddgs: - results = ddgs.text(query, max_results=num_results) - - search_results = [] - for r in results: - search_results.append(SearchResult( - title=r.get("title", ""), - url=r.get("href", ""), - snippet=r.get("body", ""), - source="DuckDuckGo" - )) - - return search_results + results = list(ddgs.text(query, max_results=num_results)) + if results: + search_results = [] + for r in results: + search_results.append(SearchResult( + title=r.get("title", ""), + url=r.get("href", ""), + snippet=r.get("body", ""), + source="DuckDuckGo" + )) + + print(f"[WebSearch] DuckDuckGo 返回 {len(search_results)} 条结果") + return search_results except ImportError: - # 如果 duckduckgo-search 未安装,返回模拟数据 - return self._search_mock(query, max_results) + print("[WebSearch] duckduckgo-search 未安装,尝试备用方案") except Exception as e: - print(f"搜索出错:{e}") - # 出错时返回模拟数据 - return self._search_mock(query, max_results) + print(f"[WebSearch] DuckDuckGo 搜索失败: {e}") + + # 方法2: 尝试使用 requests 直接调用简单搜索API + try: + import requests + + print(f"[WebSearch] 使用备用搜索方案") + + # 使用百度搜索的简易接口(仅作演示) + # 或者返回一些模拟的提示结果 + return self._search_mock(query, num_results) + + except Exception as e: + print(f"[WebSearch] 备用方案也失败: {e}") + + # 方法3: 返回模拟数据作为最后兜底 + return self._search_mock(query, num_results) def _search_mock(self, query: str, max_results: Optional[int] = None) -> List[SearchResult]: """模拟搜索结果(兜底方案)""" - mock_results = [ - SearchResult( - title=f"{query} - 搜索结果 1", - url="https://example.com/result1", - snippet=f"这是关于 {query} 的模拟搜索结果,包含相关信息摘要...", - ), - SearchResult( - title=f"{query} - 搜索结果 2", - url="https://example.com/result2", - snippet=f"更多关于 {query} 的内容,涵盖多个方面和细节...", - ), - SearchResult( - title=f"{query} - 搜索结果 3", - url="https://example.com/result3", - snippet=f"深入分析 {query} 的各个维度,提供全面的视角...", - ), + print(f"[WebSearch] 使用模拟搜索结果 (查询: {query})") + + # 根据查询内容生成更有意义的模拟结果 + mock_templates = [ + { + "title": f"关于「{query}」的相关介绍", + "snippet": "这是一个模拟的搜索结果。在实际部署中,需要确保网络连接正常,或者配置其他可用的搜索API。", + "url": "https://example.com/about" + }, + { + "title": f"「{query}」 - 最新动态", + "snippet": "搜索功能暂时使用模拟数据。请检查容器网络配置,或联系管理员配置可用的搜索服务。", + "url": "https://example.com/latest" + }, + { + "title": f"了解更多关于「{query}」的内容", + "snippet": "提示:如果需要真实的联网搜索,可以考虑使用 Bing Search API、Google Custom Search JSON API 或其他商用搜索服务。", + "url": "https://example.com/more" + } ] num = max_results or self.max_results - return mock_results[:num] + results = [] + + for i, template in enumerate(mock_templates[:num]): + results.append(SearchResult( + title=template["title"], + url=template["url"], + snippet=template["snippet"], + source="模拟数据" + )) + + return results def format_search_results(self, results: List[SearchResult]) -> str: """