This commit is contained in:
46
backend/app/subgraphs/news_analysis/__init__.py
Normal file
46
backend/app/subgraphs/news_analysis/__init__.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""
|
||||
资讯子图 - 完善版
|
||||
News Analysis Subgraph Module - Complete
|
||||
"""
|
||||
|
||||
from .state import (
|
||||
NewsAnalysisState,
|
||||
NewsAction,
|
||||
NewsItem,
|
||||
NewsSource
|
||||
)
|
||||
from .graph import build_news_analysis_subgraph
|
||||
from .nodes import (
|
||||
parse_intent,
|
||||
query_news,
|
||||
analyze_url,
|
||||
extract_keywords,
|
||||
generate_report,
|
||||
format_result,
|
||||
should_continue
|
||||
)
|
||||
from .api_client import news_api, NewsAPIClient
|
||||
|
||||
__all__ = [
|
||||
# State
|
||||
"NewsAnalysisState",
|
||||
"NewsAction",
|
||||
"NewsItem",
|
||||
"NewsSource",
|
||||
|
||||
# Graph
|
||||
"build_news_analysis_subgraph",
|
||||
|
||||
# Nodes
|
||||
"parse_intent",
|
||||
"query_news",
|
||||
"analyze_url",
|
||||
"extract_keywords",
|
||||
"generate_report",
|
||||
"format_result",
|
||||
"should_continue",
|
||||
|
||||
# API
|
||||
"news_api",
|
||||
"NewsAPIClient"
|
||||
]
|
||||
196
backend/app/subgraphs/news_analysis/api_client.py
Normal file
196
backend/app/subgraphs/news_analysis/api_client.py
Normal file
@@ -0,0 +1,196 @@
|
||||
"""
|
||||
资讯子图API调用工具
|
||||
News Analysis API Client
|
||||
支持 async 和真实数据库缓存
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional, List
|
||||
import random
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class NewsAPIClient:
|
||||
"""
|
||||
资讯API客户端 - 可扩展支持多种API和数据库缓存
|
||||
"""
|
||||
|
||||
# 可以配置多个API(如 NewsAPI, 今日头条, 百度新闻等)
|
||||
newsapi_key: Optional[str] = None
|
||||
|
||||
# 数据库 Repository(可选,用于缓存新闻)
|
||||
news_repository: Optional[Any] = None
|
||||
|
||||
async def query_news_db(self, user_id: str, keyword: str) -> Optional[List[Dict[str, Any]]]:
|
||||
"""从数据库缓存查询新闻"""
|
||||
if not self.news_repository:
|
||||
return None
|
||||
try:
|
||||
entities = await self.news_repository.search_by_keywords(user_id, keyword)
|
||||
if entities:
|
||||
return [
|
||||
{
|
||||
"title": e.title,
|
||||
"source": e.source,
|
||||
"summary": e.content,
|
||||
"keywords": e.keywords.split(",") if e.keywords else [],
|
||||
"author": "",
|
||||
"published_at": e.created_at
|
||||
}
|
||||
for e in entities
|
||||
]
|
||||
except Exception as e:
|
||||
print(f"从数据库查询新闻失败:{e}")
|
||||
return None
|
||||
|
||||
async def cache_news_db(self, user_id: str, news: Dict[str, Any]):
|
||||
"""把新闻缓存到数据库"""
|
||||
if not self.news_repository:
|
||||
return
|
||||
try:
|
||||
from ...db.models import NewsEntity
|
||||
entity = NewsEntity(
|
||||
user_id=user_id,
|
||||
title=news.get("title", ""),
|
||||
content=news.get("summary", ""),
|
||||
url=news.get("url", ""),
|
||||
source=news.get("source", ""),
|
||||
keywords=",".join(news.get("keywords", []))
|
||||
)
|
||||
await self.news_repository.insert(entity)
|
||||
except Exception as e:
|
||||
print(f"缓存新闻到数据库失败:{e}")
|
||||
|
||||
def query_news_mock(self, query: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
模拟查询资讯 - 目前用于演示
|
||||
"""
|
||||
# 模拟资讯数据库
|
||||
mock_news = [
|
||||
{
|
||||
"title": "OpenAI发布GPT-5:智能再升级",
|
||||
"source": "Tech News",
|
||||
"summary": "最新消息,OpenAI刚刚发布了GPT-5模型,智能水平再次取得重大突破...",
|
||||
"keywords": ["AI", "GPT-5", "OpenAI"],
|
||||
"author": "AI Team",
|
||||
"published_at": datetime.now().isoformat()
|
||||
},
|
||||
{
|
||||
"title": "大模型在医疗领域的应用",
|
||||
"source": "Health Tech",
|
||||
"summary": "大模型AI技术正在医疗领域展现巨大潜力,从辅助诊断到药物研发...",
|
||||
"keywords": ["医疗", "大模型", "应用"],
|
||||
"author": "Medical Team",
|
||||
"published_at": datetime.now().isoformat()
|
||||
},
|
||||
{
|
||||
"title": "2026年AI行业发展趋势报告",
|
||||
"source": "Business Daily",
|
||||
"summary": "最新行业报告显示,AI行业将继续保持高速增长,企业数字化转型加速...",
|
||||
"keywords": ["趋势", "AI", "商业"],
|
||||
"author": "Business Team",
|
||||
"published_at": datetime.now().isoformat()
|
||||
}
|
||||
]
|
||||
|
||||
# 根据查询词简单过滤
|
||||
results = []
|
||||
query_lower = query.lower()
|
||||
|
||||
for news in mock_news:
|
||||
if (query_lower in news["title"].lower() or
|
||||
query_lower in news["summary"].lower() or
|
||||
any(keyword.lower() in query_lower for keyword in news["keywords"])):
|
||||
results.append(news)
|
||||
|
||||
# 如果没有匹配到,返回前两条
|
||||
if not results:
|
||||
results = mock_news[:2]
|
||||
|
||||
return results
|
||||
|
||||
def analyze_url_mock(self, url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
模拟URL分析 - 目前用于演示
|
||||
"""
|
||||
return {
|
||||
"title": f"分析结果:{url}",
|
||||
"source": "URL Analyzer",
|
||||
"summary": "已完成对该URL的内容分析,包含文章摘要和情感倾向判断...",
|
||||
"keywords": ["News", "Analysis", url.split("/")[-1] if url else "unknown"]
|
||||
}
|
||||
|
||||
def extract_keywords_mock(self, text: str) -> List[str]:
|
||||
"""
|
||||
模拟关键词提取 - 目前用于演示
|
||||
"""
|
||||
# 简单的关键词提取模拟
|
||||
common_keywords = ["AI", "大模型", "应用场景", "行业趋势", "创新", "技术"]
|
||||
result = []
|
||||
|
||||
for keyword in common_keywords:
|
||||
if keyword.lower() in text.lower():
|
||||
result.append(keyword)
|
||||
|
||||
# 如果没找到,返回默认关键词
|
||||
if not result:
|
||||
result = ["AI", "大模型", "应用场景", "行业趋势"]
|
||||
|
||||
return result
|
||||
|
||||
def generate_report_mock(self, query: str) -> str:
|
||||
"""
|
||||
模拟报告生成 - 目前用于演示
|
||||
"""
|
||||
report = f"""═══════════════════════════════════════════
|
||||
📊 资讯分析报告
|
||||
═══════════════════════════════════════════
|
||||
|
||||
主题:{query}
|
||||
|
||||
📋 摘要:
|
||||
这是一份关于 {query} 的资讯分析综合报告,包含最新行业动态和趋势分析。
|
||||
|
||||
🔍 主要发现:
|
||||
1. AI技术持续快速发展
|
||||
2. 大模型应用场景不断拓展
|
||||
3. 行业数字化转型加速
|
||||
|
||||
🏷️ 关键词:
|
||||
- AI
|
||||
- 大模型
|
||||
- 数字化转型
|
||||
- 创新
|
||||
|
||||
═══════════════════════════════════════════
|
||||
💡 建议:继续关注行业动态,把握发展机遇!
|
||||
"""
|
||||
return report
|
||||
|
||||
# ========== 统一入口(优先查缓存) ==========
|
||||
async def query_news(self, user_id: str = "default", query: str = "", use_cache: bool = True) -> List[Dict[str, Any]]:
|
||||
"""查询新闻(统一入口,优先查数据库缓存)"""
|
||||
# 1. 先查数据库缓存
|
||||
if use_cache:
|
||||
cached = await self.query_news_db(user_id, query)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
# 2. 查第三方 API(暂未实现)
|
||||
# api_result = await self.query_news_api(query)
|
||||
# if api_result:
|
||||
# for news in api_result:
|
||||
# await self.cache_news_db(user_id, news)
|
||||
# return api_result
|
||||
|
||||
# 3. 用模拟数据(兜底)
|
||||
mock_result = self.query_news_mock(query)
|
||||
if use_cache:
|
||||
for news in mock_result:
|
||||
await self.cache_news_db(user_id, news)
|
||||
return mock_result
|
||||
|
||||
|
||||
# 单例实例(模拟模式,保持向后兼容)
|
||||
news_api = NewsAPIClient()
|
||||
63
backend/app/subgraphs/news_analysis/graph.py
Normal file
63
backend/app/subgraphs/news_analysis/graph.py
Normal file
@@ -0,0 +1,63 @@
|
||||
"""
|
||||
资讯子图构建器
|
||||
News Analysis Subgraph Builder
|
||||
"""
|
||||
|
||||
from app.main_graph.graph import StateGraph, START, END
|
||||
|
||||
from .state import NewsAnalysisState
|
||||
from .nodes import (
|
||||
parse_intent,
|
||||
query_news,
|
||||
analyze_url,
|
||||
extract_keywords,
|
||||
generate_report,
|
||||
format_result,
|
||||
should_continue
|
||||
)
|
||||
|
||||
|
||||
def build_news_analysis_subgraph() -> StateGraph:
|
||||
"""
|
||||
构建资讯子图
|
||||
|
||||
Returns:
|
||||
配置好的 StateGraph
|
||||
"""
|
||||
# 创建图
|
||||
graph = StateGraph(NewsAnalysisState)
|
||||
|
||||
# 添加节点
|
||||
graph.add_node("parse_intent", parse_intent)
|
||||
graph.add_node("query_news", query_news)
|
||||
graph.add_node("analyze_url", analyze_url)
|
||||
graph.add_node("extract_keywords", extract_keywords)
|
||||
graph.add_node("generate_report", generate_report)
|
||||
graph.add_node("format_result", format_result)
|
||||
|
||||
# 添加边
|
||||
# 从START开始
|
||||
graph.add_edge(START, "parse_intent")
|
||||
|
||||
# 从parse_intent根据条件路由
|
||||
graph.add_conditional_edges(
|
||||
"parse_intent",
|
||||
should_continue,
|
||||
{
|
||||
"query_news": "query_news",
|
||||
"analyze_url": "analyze_url",
|
||||
"extract_keywords": "extract_keywords",
|
||||
"generate_report": "generate_report",
|
||||
}
|
||||
)
|
||||
|
||||
# 从各个操作节点到format_result
|
||||
graph.add_edge("query_news", "format_result")
|
||||
graph.add_edge("analyze_url", "format_result")
|
||||
graph.add_edge("extract_keywords", "format_result")
|
||||
graph.add_edge("generate_report", "format_result")
|
||||
|
||||
# 最终到END
|
||||
graph.add_edge("format_result", END)
|
||||
|
||||
return graph
|
||||
185
backend/app/subgraphs/news_analysis/nodes.py
Normal file
185
backend/app/subgraphs/news_analysis/nodes.py
Normal file
@@ -0,0 +1,185 @@
|
||||
"""
|
||||
资讯子图节点 - 使用公共工具版本
|
||||
News Analysis Subgraph Nodes - Using Common Tools
|
||||
"""
|
||||
|
||||
from typing import Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
# 公共工具
|
||||
from ..common import MarkdownFormatter
|
||||
|
||||
from .state import (
|
||||
NewsAnalysisState,
|
||||
NewsAction,
|
||||
NewsItem,
|
||||
NewsSource
|
||||
)
|
||||
from .api_client import news_api
|
||||
|
||||
|
||||
def parse_intent(state: NewsAnalysisState) -> NewsAnalysisState:
|
||||
"""
|
||||
解析用户意图节点
|
||||
确定用户想做什么操作
|
||||
"""
|
||||
query_lower = state.user_query.lower()
|
||||
|
||||
if any(keyword in query_lower for keyword in ["资讯", "新闻", "news", "report"]):
|
||||
state.action = NewsAction.QUERY_NEWS
|
||||
elif any(keyword in query_lower for keyword in ["分析", "analyze", "url", "链接"]):
|
||||
state.action = NewsAction.ANALYZE_URL
|
||||
elif any(keyword in query_lower for keyword in ["关键词", "keyword", "提取"]):
|
||||
state.action = NewsAction.EXTRACT_KEYWORDS
|
||||
elif any(keyword in query_lower for keyword in ["报告", "生成", "generate"]):
|
||||
state.action = NewsAction.GENERATE_REPORT
|
||||
else:
|
||||
state.action = NewsAction.QUERY_NEWS
|
||||
|
||||
return state
|
||||
|
||||
|
||||
def query_news(state: NewsAnalysisState) -> NewsAnalysisState:
|
||||
"""
|
||||
查询资讯节点
|
||||
"""
|
||||
state.current_phase = "executing"
|
||||
|
||||
# 使用 API 客户端
|
||||
news_items = news_api.query_news(state.user_query)
|
||||
state.news_items = news_items
|
||||
|
||||
return state
|
||||
|
||||
|
||||
def analyze_url(state: NewsAnalysisState) -> NewsAnalysisState:
|
||||
"""
|
||||
分析 URL 节点
|
||||
"""
|
||||
state.current_phase = "executing"
|
||||
|
||||
# 从用户输入中提取 URL(简单处理)
|
||||
query = state.user_query
|
||||
url = query
|
||||
for keyword in ["分析", "analyze", "url", "链接"]:
|
||||
url = url.replace(keyword, "").strip()
|
||||
|
||||
if not url:
|
||||
url = "https://example.com/news/article"
|
||||
|
||||
state.custom_urls = [url]
|
||||
|
||||
# 使用 API 客户端
|
||||
analysis = news_api.analyze_url(url)
|
||||
state.analysis = analysis
|
||||
|
||||
return state
|
||||
|
||||
|
||||
def extract_keywords(state: NewsAnalysisState) -> NewsAnalysisState:
|
||||
"""
|
||||
提取关键词节点
|
||||
"""
|
||||
state.current_phase = "executing"
|
||||
|
||||
# 使用 API 客户端
|
||||
keywords = news_api.extract_keywords(state.user_query)
|
||||
state.extracted_keywords = keywords
|
||||
|
||||
return state
|
||||
|
||||
|
||||
def generate_report(state: NewsAnalysisState) -> NewsAnalysisState:
|
||||
"""
|
||||
生成报告节点
|
||||
"""
|
||||
state.current_phase = "executing"
|
||||
|
||||
# 使用 API 客户端
|
||||
report = news_api.generate_report(state.user_query)
|
||||
state.report_content = report
|
||||
|
||||
return state
|
||||
|
||||
|
||||
def format_result(state: NewsAnalysisState) -> NewsAnalysisState:
|
||||
"""
|
||||
格式化结果节点(使用公共工具)
|
||||
"""
|
||||
state.current_phase = "formatting"
|
||||
|
||||
md = MarkdownFormatter()
|
||||
output_lines = []
|
||||
|
||||
output_lines.append("┌───────────────────────────────────┐")
|
||||
output_lines.append("│ 📰 资讯助手 │")
|
||||
output_lines.append("└───────────────────────────────────┘")
|
||||
output_lines.append("")
|
||||
|
||||
if state.action == NewsAction.QUERY_NEWS and state.news_items:
|
||||
output_lines.append(md.heading("📰 最新资讯", 2))
|
||||
output_lines.append("")
|
||||
|
||||
for item in state.news_items:
|
||||
output_lines.append(md.heading(item.title, 3))
|
||||
output_lines.append(f"> 来源: {item.source.value}")
|
||||
output_lines.append(f"> 时间: {item.published_at.strftime('%Y-%m-%d %H:%M')}")
|
||||
if item.summary:
|
||||
output_lines.append("")
|
||||
output_lines.append(item.summary)
|
||||
if item.url:
|
||||
output_lines.append(f"🔗 链接: {md.link(item.title, item.url)}")
|
||||
output_lines.append("")
|
||||
|
||||
elif state.action == NewsAction.EXTRACT_KEYWORDS and state.extracted_keywords:
|
||||
output_lines.append(md.heading("🏷️ 提取的关键词", 2))
|
||||
output_lines.append("")
|
||||
keywords_data = [
|
||||
{"关键词": k, "权重": f"{w:.2f}"}
|
||||
for k, w in state.extracted_keywords.items()
|
||||
]
|
||||
output_lines.append(md.table(keywords_data))
|
||||
|
||||
elif state.action == NewsAction.GENERATE_REPORT and state.report_content:
|
||||
output_lines.append(md.heading("📊 分析报告", 2))
|
||||
output_lines.append("")
|
||||
output_lines.append(state.report_content)
|
||||
|
||||
elif state.action == NewsAction.ANALYZE_URL and state.analysis:
|
||||
output_lines.append(md.heading("🔍 URL 分析", 2))
|
||||
output_lines.append("")
|
||||
output_lines.append(f"> URL: {state.custom_urls[0]}")
|
||||
output_lines.append("")
|
||||
output_lines.append(state.analysis)
|
||||
|
||||
else:
|
||||
output_lines.append(md.heading("✨ 操作完成", 2))
|
||||
output_lines.append("您的请求已处理。")
|
||||
|
||||
# 页脚提示
|
||||
output_lines.append("")
|
||||
output_lines.append("---")
|
||||
output_lines.append("💡 提示:您可以继续查询资讯、提取关键词或者生成报告!")
|
||||
|
||||
state.final_result = "\n".join(output_lines)
|
||||
state.success = True
|
||||
state.current_phase = "completed"
|
||||
|
||||
return state
|
||||
|
||||
|
||||
def should_continue(state: NewsAnalysisState) -> str:
|
||||
"""
|
||||
条件路由函数:根据 action 决定下一个节点
|
||||
"""
|
||||
action = state.action
|
||||
if action == NewsAction.QUERY:
|
||||
return "query_news"
|
||||
elif action == NewsAction.ANALYZE_URL:
|
||||
return "analyze_url"
|
||||
elif action == NewsAction.EXTRACT_KEYWORDS:
|
||||
return "extract_keywords"
|
||||
elif action == NewsAction.GENERATE_REPORT:
|
||||
return "generate_report"
|
||||
else:
|
||||
return "format_result"
|
||||
89
backend/app/subgraphs/news_analysis/state.py
Normal file
89
backend/app/subgraphs/news_analysis/state.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""
|
||||
资讯子图状态定义
|
||||
News Analysis Subgraph State Definition
|
||||
"""
|
||||
|
||||
from enum import Enum, auto
|
||||
from typing import Optional, Dict, List, Any
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
class NewsAction(Enum):
|
||||
"""资讯操作类型"""
|
||||
NONE = auto()
|
||||
QUERY_NEWS = auto() # 查询资讯
|
||||
ANALYZE_URL = auto() # 分析资讯
|
||||
GENERATE_REPORT = auto() # 生成报告
|
||||
FETCH_FROM_SOURCES = auto() # 从指定源获取
|
||||
EXTRACT_KEYWORDS = auto() # 提取关键词
|
||||
|
||||
|
||||
@dataclass
|
||||
class NewsItem:
|
||||
"""资讯条目"""
|
||||
title: str = ""
|
||||
url: str = ""
|
||||
source: str = ""
|
||||
content: str = ""
|
||||
author: str = ""
|
||||
published_at: Optional[str] = None
|
||||
summary: str = ""
|
||||
keywords: List[str] = field(default_factory=list)
|
||||
sentiment: float = 0.0 # 情感分析得分
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class NewsSource:
|
||||
"""资讯源"""
|
||||
name: str = ""
|
||||
url: str = ""
|
||||
type: str = "" # rss, website, api
|
||||
enabled: bool = True
|
||||
last_fetched_at: Optional[str] = None
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class NewsAnalysisState:
|
||||
"""资讯子图状态"""
|
||||
# ========== 输入 ==========
|
||||
user_query: str = "" # 用户查询
|
||||
user_id: str = "" # 用户ID
|
||||
|
||||
# 操作控制
|
||||
action: NewsAction = NewsAction.NONE
|
||||
action_params: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# 源配置
|
||||
use_follow_list: bool = False
|
||||
custom_urls: List[str] = field(default_factory=list)
|
||||
|
||||
# ========== 执行过程 ==========
|
||||
current_phase: str = "init" # init, fetching, analyzing, done
|
||||
current_source_index: int = 0
|
||||
primary_fetched: bool = False
|
||||
|
||||
# 源列表
|
||||
sources: List[NewsSource] = field(default_factory=list)
|
||||
|
||||
# 资讯条目
|
||||
news_items: List[NewsItem] = field(default_factory=list)
|
||||
|
||||
# 关键词
|
||||
extracted_keywords: List[str] = field(default_factory=list)
|
||||
|
||||
# 报告
|
||||
report_content: str = ""
|
||||
|
||||
# ========== 结果 ==========
|
||||
success: bool = False
|
||||
error_message: str = ""
|
||||
final_result: str = ""
|
||||
result_data: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# ========== 元数据 ==========
|
||||
start_time: Optional[str] = None
|
||||
end_time: Optional[str] = None
|
||||
duration: float = 0.0
|
||||
debug_info: Dict[str, Any] = field(default_factory=dict)
|
||||
Reference in New Issue
Block a user