Files
ailine/backend/app/agent_subgraphs/common/formatter.py
root 55c910bbe0
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Failing after 6m52s
cleanup: 删除旧的 rag/reranker.py
- rag/reranker.py 是旧代码,不再使用
- 现在使用 model_services/rerank_services.py 提供重排服务
- pipeline.py 已经通过 get_rerank_service() 使用服务层
2026-04-26 11:43:06 +08:00

486 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
格式化输出工具模块
提供基于 Jinja2 模板的 Markdown 格式化输出能力
功能:
1. TemplateManager - 模板管理器,支持加载和渲染 Jinja2 模板
2. MarkdownFormatter - Markdown 格式化工具,提供常用格式(表格、列表、引用等)
3. OutputRenderer - 输出渲染器,统一接口生成最终输出
4. PresetTemplates - 预置模板(对话摘要、报告、列表等)
"""
import os
from pathlib import Path
from typing import Dict, List, Any, Optional, Union
from dataclasses import dataclass
from abc import ABC, abstractmethod
# 尝试导入 Jinja2如果没有则提供基础实现
try:
from jinja2 import Template as JinjaTemplate, Environment, BaseLoader
HAS_JINJA2 = True
except ImportError:
HAS_JINJA2 = False
class BaseFormatter(ABC):
"""格式化器基类"""
@abstractmethod
def format(self, data: Any) -> str:
"""格式化数据为字符串"""
pass
class MarkdownFormatter(BaseFormatter):
"""Markdown 格式化工具"""
@staticmethod
def table(data: List[Dict[str, Any]], headers: Optional[List[str]] = None) -> str:
"""
生成 Markdown 表格
Args:
data: 数据列表,每个元素是一个字典
headers: 表头列表,如果为 None 则使用字典的键
Returns:
Markdown 表格字符串
"""
if not data:
return ""
if headers is None:
headers = list(data[0].keys()) if data else []
if not headers:
return ""
lines = []
# 表头行
header_line = "| " + " | ".join(str(h) for h in headers) + " |"
lines.append(header_line)
# 分隔线
separator_line = "| " + " | ".join("---" for _ in headers) + " |"
lines.append(separator_line)
# 数据行
for row in data:
row_values = [str(row.get(h, "")) for h in headers]
row_line = "| " + " | ".join(row_values) + " |"
lines.append(row_line)
return "\n".join(lines)
@staticmethod
def bullet_list(items: List[str], indent: int = 0) -> str:
"""
生成无序列表
Args:
items: 列表项
indent: 缩进层级
Returns:
Markdown 无序列表字符串
"""
indent_str = " " * indent
return "\n".join(f"{indent_str}- {item}" for item in items)
@staticmethod
def numbered_list(items: List[str], start: int = 1, indent: int = 0) -> str:
"""
生成有序列表
Args:
items: 列表项
start: 起始编号
indent: 缩进层级
Returns:
Markdown 有序列表字符串
"""
indent_str = " " * indent
return "\n".join(f"{indent_str}{i}. {item}" for i, item in enumerate(items, start=start))
@staticmethod
def quote(text: str, author: Optional[str] = None) -> str:
"""
生成引用块
Args:
text: 引用文本
author: 作者(可选)
Returns:
Markdown 引用块字符串
"""
quoted_lines = "\n".join(f"> {line}" for line in text.split("\n"))
if author:
quoted_lines += f"\n> — {author}"
return quoted_lines
@staticmethod
def code(code: str, language: str = "") -> str:
"""
生成代码块
Args:
code: 代码内容
language: 语言标识符
Returns:
Markdown 代码块字符串
"""
return f"```{language}\n{code}\n```"
@staticmethod
def heading(text: str, level: int = 1) -> str:
"""
生成标题
Args:
text: 标题文本
level: 标题级别1-6
Returns:
Markdown 标题字符串
"""
level = max(1, min(6, level))
return f"{'#' * level} {text}"
@staticmethod
def link(text: str, url: str) -> str:
"""
生成链接
Args:
text: 链接文本
url: 链接地址
Returns:
Markdown 链接字符串
"""
return f"[{text}]({url})"
@staticmethod
def bold(text: str) -> str:
"""生成粗体"""
return f"**{text}**"
@staticmethod
def italic(text: str) -> str:
"""生成斜体"""
return f"*{text}*"
@staticmethod
def divider() -> str:
"""生成分割线"""
return "---"
def format(self, data: Any) -> str:
"""实现基类方法,根据数据类型自动选择格式化方式"""
if isinstance(data, list):
if len(data) > 0 and isinstance(data[0], dict):
return self.table(data)
else:
return self.bullet_list([str(item) for item in data])
elif isinstance(data, dict):
return self.table([data])
else:
return str(data)
@dataclass
class Template:
"""模板数据类"""
name: str
content: str
description: str = ""
class DictLoader(BaseLoader):
"""字典模板加载器"""
用于从内存字典中加载模板
"""
def __init__(self, templates: Dict[str, str]):
self.templates = templates
def get_source(self, environment, template):
if template not in self.templates:
raise TemplateNotFound(template)
source = self.templates[template]
return source, None, lambda: True
class TemplateManager:
"""Jinja2 模板管理器"""
def __init__(self, template_dir: Optional[Path] = None):
"""
初始化模板管理器
Args:
template_dir: 模板目录路径
"""
self._templates: Dict[str, Template] = {}
self.template_dir = template_dir
self._env: Optional[Environment] = None
if HAS_JINJA2:
self._env = Environment(loader=DictLoader({}))
def _refresh_env(self) -> None:
"""刷新 Jinja2 环境"""
if HAS_JINJA2 and self._env is not None:
template_dict = {name: t.content for name, t in self._templates.items()}
self._env = Environment(loader=DictLoader(template_dict))
def add_template(self, name: str, content: str, description: str = "") -> None:
"""
添加模板
Args:
name: 模板名称
content: 模板内容
description: 模板描述
"""
self._templates[name] = Template(name=name, content=content, description=description)
self._refresh_env()
def load_template(self, name: str, file_path: Path) -> None:
"""
从文件加载模板
Args:
name: 模板名称
file_path: 模板文件路径
"""
if file_path.exists():
content = file_path.read_text(encoding='utf-8')
self.add_template(name, content, f"从文件加载: {file_path}")
def get_template(self, name: str) -> Optional[Template]:
"""
获取模板
Args:
name: 模板名称
Returns:
模板对象,如果不存在返回 None
"""
return self._templates.get(name)
def render(self, template_name: str, context: Dict[str, Any]) -> str:
"""
渲染模板
Args:
template_name: 模板名称
context: 渲染上下文
Returns:
渲染后的字符串
"""
template = self.get_template(template_name)
if template is None:
raise ValueError(f"模板不存在: {template_name}")
return self.render_string(template.content, context)
def render_string(self, template_string: str, context: Dict[str, Any]) -> str:
"""
渲染模板字符串
Args:
template_string: 模板字符串
context: 渲染上下文
Returns:
渲染后的字符串
"""
if HAS_JINJA2 and self._env is not None:
try:
jinja_template = self._env.from_string(template_string)
return jinja_template.render(**context)
except Exception:
# 如果 Jinja2 渲染失败,使用简单替换
pass
# 简单的字符串替换作为备选方案
result = template_string
for key, value in context.items():
result = result.replace(f"{{{{{key}}}}}", str(value))
result = result.replace(f"{{{{ {key} }}}}", str(value))
return result
class PresetTemplates:
"""预置模板集合"""
@staticmethod
def conversation_summary() -> str:
"""对话摘要模板"""
return """# 对话摘要
**时间**: {{ timestamp }}
**参与者**: {{ participants }}
---
## 对话要点
{{ bullet_list(points) }}
---
## 总结
{{ summary }}
"""
@staticmethod
def research_report() -> str:
"""研究报告模板"""
return """# {{ title }}
**日期**: {{ date }}
**作者**: {{ author }}
---
## 摘要
{{ summary }}
---
## 发现
{{ bullet_list(findings) }}
---
## 数据来源
{{ sources }}
"""
@staticmethod
def task_list() -> str:
"""任务列表模板"""
return """# 任务列表
**更新时间**: {{ update_time }}
---
## 待办
{{ numbered_list(todos) }}
---
## 已完成
{{ numbered_list(completed) }}
"""
@staticmethod
def data_summary() -> str:
"""数据摘要模板"""
return """# 数据摘要
**生成时间**: {{ timestamp }}
---
## 数据概览
{{ table(data_overview) }}
---
## 关键指标
{{ bullet_list(metrics) }}
"""
class OutputRenderer:
"""输出渲染器"""
def __init__(self, template_manager: Optional[TemplateManager] = None):
"""
初始化输出渲染器
Args:
template_manager: 模板管理器
"""
self.template_manager = template_manager or TemplateManager()
self.markdown = MarkdownFormatter()
# 自动注册预置模板
self._register_presets()
def _register_presets(self) -> None:
"""注册预置模板"""
self.template_manager.add_template(
"conversation_summary",
PresetTemplates.conversation_summary(),
"对话摘要模板"
)
self.template_manager.add_template(
"research_report",
PresetTemplates.research_report(),
"研究报告模板"
)
self.template_manager.add_template(
"task_list",
PresetTemplates.task_list(),
"任务列表模板"
)
self.template_manager.add_template(
"data_summary",
PresetTemplates.data_summary(),
"数据摘要模板"
)
def render(self, template_name: str, context: Dict[str, Any]) -> str:
"""
使用模板渲染输出
Args:
template_name: 模板名称
context: 渲染上下文
Returns:
渲染后的字符串
"""
# 将格式化工具注入上下文
render_context = context.copy()
render_context["bullet_list"] = self.markdown.bullet_list
render_context["numbered_list"] = self.markdown.numbered_list
render_context["table"] = self.markdown.table
render_context["quote"] = self.markdown.quote
render_context["code"] = self.markdown.code
render_context["heading"] = self.markdown.heading
render_context["link"] = self.markdown.link
render_context["bold"] = self.markdown.bold
render_context["italic"] = self.markdown.italic
render_context["divider"] = self.markdown.divider
return self.template_manager.render(template_name, render_context)
def render_plain(self, data: Any) -> str:
"""
直接格式化数据为 Markdown
Args:
data: 数据
Returns:
格式化后的字符串
"""
return self.markdown.format(data)
"""
格式化后的字符串
"""
return str(data)