""" 工具定义模块 - 纯函数工具,无依赖 AIAgent 类 """ # 标准库 from pathlib import Path # 第三方库 import pandas as pd import pypdf import requests from bs4 import BeautifulSoup from langchain_core.tools import tool def _file_allow_check(filename: str) -> Path: """检查用户文件名是否位于允许目录 './user_docs' 下,防止路径遍历攻击。""" allowed_dir = Path("./user_docs").resolve() allowed_dir.mkdir(exist_ok=True) file_path = (allowed_dir / filename).resolve() if not str(file_path).startswith(str(allowed_dir)): raise ValueError("错误:非法文件路径。") if not file_path.exists(): raise FileNotFoundError(f"错误:文件 '{filename}' 不存在。") return file_path @tool def get_current_temperature(location: str) -> str: """获取指定地点的当前温度。""" return f'当前{location}的温度为25℃' @tool def read_local_file(filename: str) -> str: """读取用户指定名称的本地文本文件内容并返回摘要。""" try: file_path = _file_allow_check(filename) with open(file_path, 'r', encoding='utf-8') as f: content = f.read() return f"文件 '{filename}' 的内容开头:\n{content[:1000]}..." except Exception as e: return f"读取文件时出错:{str(e)}" @tool def read_pdf_summary(filename: str) -> str: """读取PDF文件并返回内容文本摘要。""" try: file_path = _file_allow_check(filename) text = "" with open(file_path, 'rb') as f: reader = pypdf.PdfReader(f) for page in reader.pages[:3]: text += page.extract_text() return f"PDF文件 '{filename}' 的前几页内容:\n{text[:2000]}..." except Exception as e: return f"读取PDF出错:{e}" @tool def read_excel_as_markdown(filename: str) -> str: """读取Excel文件,并将其主要数据转换为Markdown表格格式。""" try: file_path = _file_allow_check(filename) df = pd.read_excel(file_path) markdown_table = df.head(10).to_markdown(index=False) return f"Excel文件 '{filename}' 的数据预览(前10行):\n{markdown_table}" except Exception as e: return f"读取Excel出错:{e}" @tool def fetch_webpage_content(url: str) -> str: """抓取给定URL的网页正文内容,并返回清晰的纯文本。""" try: response = requests.get(url, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') for script in soup(["script", "style"]): script.decompose() text = soup.get_text() lines = (line.strip() for line in text.splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) text = '\n'.join(chunk for chunk in chunks if chunk) return f"成功抓取网页 {url},正文内容开头:\n{text[:1500]}..." except Exception as e: return f"抓取网页时出错:{str(e)}" # 工具列表和映射(全局常量) AVAILABLE_TOOLS = [ get_current_temperature, read_local_file, fetch_webpage_content, read_pdf_summary, read_excel_as_markdown ] TOOLS_BY_NAME = {tool.name: tool for tool in AVAILABLE_TOOLS}