实现前后端分离的agent

2026-04-13 19:49:18 +08:00
parent 09a5440045
commit 4385fabc22
13 changed files with 1317 additions and 188 deletions
--- a/tools.py
+++ b/tools.py
@@ -0,0 +1,103 @@
+"""
+工具定义模块 - 纯函数工具，无依赖 AIAgent 类
+"""
+
+# 标准库
+import os
+from pathlib import Path
+
+# 第三方库
+import pandas as pd
+import pypdf
+import requests
+from bs4 import BeautifulSoup
+from langchain_core.tools import tool
+
+
+def _file_allow_check(filename: str) -> Path:
+    """检查用户文件名是否位于允许目录 './user_docs' 下，防止路径遍历攻击。"""
+    allowed_dir = Path("./user_docs").resolve()
+    allowed_dir.mkdir(exist_ok=True)
+
+    file_path = (allowed_dir / filename).resolve()
+    if not str(file_path).startswith(str(allowed_dir)):
+        raise ValueError("错误：非法文件路径。")
+
+    if not file_path.exists():
+        raise FileNotFoundError(f"错误：文件 '{filename}' 不存在。")
+
+    return file_path
+
+
+@tool
+def get_current_temperature(location: str) -> str:
+    """获取指定地点的当前温度。"""
+    return f'当前{location}的温度为25℃'
+
+
+@tool
+def read_local_file(filename: str) -> str:
+    """读取用户指定名称的本地文本文件内容并返回摘要。"""
+    try:
+        file_path = _file_allow_check(filename)
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        return f"文件 '{filename}' 的内容开头：\n{content[:1000]}..."
+    except Exception as e:
+        return f"读取文件时出错：{str(e)}"
+
+
+@tool
+def read_pdf_summary(filename: str) -> str:
+    """读取PDF文件并返回内容文本摘要。"""
+    try:
+        file_path = _file_allow_check(filename)
+        text = ""
+        with open(file_path, 'rb') as f:
+            reader = pypdf.PdfReader(f)
+            for page in reader.pages[:3]:
+                text += page.extract_text()
+        return f"PDF文件 '{filename}' 的前几页内容：\n{text[:2000]}..."
+    except Exception as e:
+        return f"读取PDF出错：{e}"
+
+
+@tool
+def read_excel_as_markdown(filename: str) -> str:
+    """读取Excel文件，并将其主要数据转换为Markdown表格格式。"""
+    try:
+        file_path = _file_allow_check(filename)
+        df = pd.read_excel(file_path)
+        markdown_table = df.head(10).to_markdown(index=False)
+        return f"Excel文件 '{filename}' 的数据预览（前10行）：\n{markdown_table}"
+    except Exception as e:
+        return f"读取Excel出错：{e}"
+
+
+@tool
+def fetch_webpage_content(url: str) -> str:
+    """抓取给定URL的网页正文内容，并返回清晰的纯文本。"""
+    try:
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        for script in soup(["script", "style"]):
+            script.decompose()
+        text = soup.get_text()
+        lines = (line.strip() for line in text.splitlines())
+        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+        text = '\n'.join(chunk for chunk in chunks if chunk)
+        return f"成功抓取网页 {url}，正文内容开头：\n{text[:1500]}..."
+    except Exception as e:
+        return f"抓取网页时出错：{str(e)}"
+
+
+# 工具列表和映射（全局常量）
+AVAILABLE_TOOLS = [
+    get_current_temperature,
+    read_local_file,
+    fetch_webpage_content,
+    read_pdf_summary,
+    read_excel_as_markdown
+]
+TOOLS_BY_NAME = {tool.name: tool for tool in AVAILABLE_TOOLS}