104 lines
3.3 KiB
Python
104 lines
3.3 KiB
Python
|
|
"""
|
|||
|
|
工具定义模块 - 纯函数工具,无依赖 AIAgent 类
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
# 标准库
|
|||
|
|
import os
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
# 第三方库
|
|||
|
|
import pandas as pd
|
|||
|
|
import pypdf
|
|||
|
|
import requests
|
|||
|
|
from bs4 import BeautifulSoup
|
|||
|
|
from langchain_core.tools import tool
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _file_allow_check(filename: str) -> Path:
|
|||
|
|
"""检查用户文件名是否位于允许目录 './user_docs' 下,防止路径遍历攻击。"""
|
|||
|
|
allowed_dir = Path("./user_docs").resolve()
|
|||
|
|
allowed_dir.mkdir(exist_ok=True)
|
|||
|
|
|
|||
|
|
file_path = (allowed_dir / filename).resolve()
|
|||
|
|
if not str(file_path).startswith(str(allowed_dir)):
|
|||
|
|
raise ValueError("错误:非法文件路径。")
|
|||
|
|
|
|||
|
|
if not file_path.exists():
|
|||
|
|
raise FileNotFoundError(f"错误:文件 '{filename}' 不存在。")
|
|||
|
|
|
|||
|
|
return file_path
|
|||
|
|
|
|||
|
|
|
|||
|
|
@tool
|
|||
|
|
def get_current_temperature(location: str) -> str:
|
|||
|
|
"""获取指定地点的当前温度。"""
|
|||
|
|
return f'当前{location}的温度为25℃'
|
|||
|
|
|
|||
|
|
|
|||
|
|
@tool
|
|||
|
|
def read_local_file(filename: str) -> str:
|
|||
|
|
"""读取用户指定名称的本地文本文件内容并返回摘要。"""
|
|||
|
|
try:
|
|||
|
|
file_path = _file_allow_check(filename)
|
|||
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|||
|
|
content = f.read()
|
|||
|
|
return f"文件 '{filename}' 的内容开头:\n{content[:1000]}..."
|
|||
|
|
except Exception as e:
|
|||
|
|
return f"读取文件时出错:{str(e)}"
|
|||
|
|
|
|||
|
|
|
|||
|
|
@tool
|
|||
|
|
def read_pdf_summary(filename: str) -> str:
|
|||
|
|
"""读取PDF文件并返回内容文本摘要。"""
|
|||
|
|
try:
|
|||
|
|
file_path = _file_allow_check(filename)
|
|||
|
|
text = ""
|
|||
|
|
with open(file_path, 'rb') as f:
|
|||
|
|
reader = pypdf.PdfReader(f)
|
|||
|
|
for page in reader.pages[:3]:
|
|||
|
|
text += page.extract_text()
|
|||
|
|
return f"PDF文件 '{filename}' 的前几页内容:\n{text[:2000]}..."
|
|||
|
|
except Exception as e:
|
|||
|
|
return f"读取PDF出错:{e}"
|
|||
|
|
|
|||
|
|
|
|||
|
|
@tool
|
|||
|
|
def read_excel_as_markdown(filename: str) -> str:
|
|||
|
|
"""读取Excel文件,并将其主要数据转换为Markdown表格格式。"""
|
|||
|
|
try:
|
|||
|
|
file_path = _file_allow_check(filename)
|
|||
|
|
df = pd.read_excel(file_path)
|
|||
|
|
markdown_table = df.head(10).to_markdown(index=False)
|
|||
|
|
return f"Excel文件 '{filename}' 的数据预览(前10行):\n{markdown_table}"
|
|||
|
|
except Exception as e:
|
|||
|
|
return f"读取Excel出错:{e}"
|
|||
|
|
|
|||
|
|
|
|||
|
|
@tool
|
|||
|
|
def fetch_webpage_content(url: str) -> str:
|
|||
|
|
"""抓取给定URL的网页正文内容,并返回清晰的纯文本。"""
|
|||
|
|
try:
|
|||
|
|
response = requests.get(url, timeout=10)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|||
|
|
for script in soup(["script", "style"]):
|
|||
|
|
script.decompose()
|
|||
|
|
text = soup.get_text()
|
|||
|
|
lines = (line.strip() for line in text.splitlines())
|
|||
|
|
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
|||
|
|
text = '\n'.join(chunk for chunk in chunks if chunk)
|
|||
|
|
return f"成功抓取网页 {url},正文内容开头:\n{text[:1500]}..."
|
|||
|
|
except Exception as e:
|
|||
|
|
return f"抓取网页时出错:{str(e)}"
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 工具列表和映射(全局常量)
|
|||
|
|
AVAILABLE_TOOLS = [
|
|||
|
|
get_current_temperature,
|
|||
|
|
read_local_file,
|
|||
|
|
fetch_webpage_content,
|
|||
|
|
read_pdf_summary,
|
|||
|
|
read_excel_as_markdown
|
|||
|
|
]
|
|||
|
|
TOOLS_BY_NAME = {tool.name: tool for tool in AVAILABLE_TOOLS}
|