You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

206 lines
8.5 KiB

4 months ago
import random
import requests
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
from config.logger import setup_logging
from plugins_func.register import register_function, ToolType, ActionResponse, Action
TAG = __name__
logger = setup_logging()
GET_NEWS_FUNCTION_DESC = {
"type": "function",
"function": {
"name": "get_news",
"description": (
"获取最新新闻,随机选择一条新闻进行播报。"
"用户可以指定新闻类型,如社会新闻、科技新闻、国际新闻等。"
"如果没有指定,默认播报社会新闻。"
"用户可以要求获取详细内容,此时会获取新闻的详细内容。"
),
"parameters": {
"type": "object",
"properties": {
"category": {
"type": "string",
"description": "新闻类别,例如社会、科技、国际。可选参数,如果不提供则使用默认类别"
},
"detail": {
"type": "boolean",
"description": "是否获取详细内容默认为false。如果为true则获取上一条新闻的详细内容"
},
"lang": {
"type": "string",
"description": "返回用户使用的语言code例如zh_CN/zh_HK/en_US/ja_JP等默认zh_CN"
}
},
"required": ["lang"]
}
}
}
def fetch_news_from_rss(rss_url):
"""从RSS源获取新闻列表"""
try:
response = requests.get(rss_url)
response.raise_for_status()
# 解析XML
root = ET.fromstring(response.content)
# 查找所有item元素新闻条目
news_items = []
for item in root.findall('.//item'):
title = item.find('title').text if item.find('title') is not None else "无标题"
link = item.find('link').text if item.find('link') is not None else "#"
description = item.find('description').text if item.find('description') is not None else "无描述"
pubDate = item.find('pubDate').text if item.find('pubDate') is not None else "未知时间"
news_items.append({
'title': title,
'link': link,
'description': description,
'pubDate': pubDate
})
return news_items
except Exception as e:
logger.bind(tag=TAG).error(f"获取RSS新闻失败: {e}")
return []
def fetch_news_detail(url):
"""获取新闻详情页内容并总结"""
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
# 尝试提取正文内容 (这里的选择器需要根据实际网站结构调整)
content_div = soup.select_one('.content_desc, .content, article, .article-content')
if content_div:
paragraphs = content_div.find_all('p')
content = '\n'.join([p.get_text().strip() for p in paragraphs if p.get_text().strip()])
return content
else:
# 如果找不到特定的内容区域,尝试获取所有段落
paragraphs = soup.find_all('p')
content = '\n'.join([p.get_text().strip() for p in paragraphs if p.get_text().strip()])
return content[:2000] # 限制长度
except Exception as e:
logger.bind(tag=TAG).error(f"获取新闻详情失败: {e}")
return "无法获取详细内容"
def map_category(category_text):
"""将用户输入的中文类别映射到配置文件中的类别键"""
if not category_text:
return None
# 类别映射字典,目前支持社会、国际、财经新闻,如需更多类型,参见配置文件
category_map = {
# 社会新闻
"社会": "society",
"社会新闻": "society",
# 国际新闻
"国际": "world",
"国际新闻": "world",
# 财经新闻
"财经": "finance",
"财经新闻": "finance",
"金融": "finance",
"经济": "finance"
}
# 转换为小写并去除空格
normalized_category = category_text.lower().strip()
# 返回映射结果,如果没有匹配项则返回原始输入
return category_map.get(normalized_category, category_text)
@register_function('get_news', GET_NEWS_FUNCTION_DESC, ToolType.SYSTEM_CTL)
def get_news(conn, category: str = None, detail: bool = False, lang: str = "zh_CN"):
"""获取新闻并随机选择一条进行播报,或获取上一条新闻的详细内容"""
try:
# 如果detail为True获取上一条新闻的详细内容
if detail:
if not hasattr(conn, 'last_news_link') or not conn.last_news_link or 'link' not in conn.last_news_link:
return ActionResponse(Action.REQLLM, "抱歉,没有找到最近查询的新闻,请先获取一条新闻。", None)
link = conn.last_news_link.get('link')
title = conn.last_news_link.get('title', '未知标题')
if link == '#':
return ActionResponse(Action.REQLLM, "抱歉,该新闻没有可用的链接获取详细内容。", None)
logger.bind(tag=TAG).debug(f"获取新闻详情: {title}, URL={link}")
# 获取新闻详情
detail_content = fetch_news_detail(link)
if not detail_content or detail_content == "无法获取详细内容":
return ActionResponse(Action.REQLLM,
f"抱歉,无法获取《{title}》的详细内容,可能是链接已失效或网站结构发生变化。", None)
# 构建详情报告
detail_report = (
f"根据下列数据,用{lang}回应用户的新闻详情查询请求:\n\n"
f"新闻标题: {title}\n"
f"详细内容: {detail_content}\n\n"
f"(请对上述新闻内容进行总结,提取关键信息,以自然、流畅的方式向用户播报,"
f"不要提及这是总结,就像是在讲述一个完整的新闻故事)"
)
return ActionResponse(Action.REQLLM, detail_report, None)
# 否则,获取新闻列表并随机选择一条
# 从配置中获取RSS URL
rss_config = conn.config["plugins"]["get_news"]
default_rss_url = rss_config.get("default_rss_url", "https://www.chinanews.com.cn/rss/society.xml")
# 将用户输入的类别映射到配置中的类别键
mapped_category = map_category(category)
# 如果提供了类别尝试从配置中获取对应的URL
rss_url = default_rss_url
if mapped_category and mapped_category in rss_config.get("category_urls", {}):
rss_url = rss_config["category_urls"][mapped_category]
logger.bind(tag=TAG).info(f"获取新闻: 原始类别={category}, 映射类别={mapped_category}, URL={rss_url}")
# 获取新闻列表
news_items = fetch_news_from_rss(rss_url)
if not news_items:
return ActionResponse(Action.REQLLM, "抱歉,未能获取到新闻信息,请稍后再试。", None)
# 随机选择一条新闻
selected_news = random.choice(news_items)
# 保存当前新闻链接到连接对象,以便后续查询详情
if not hasattr(conn, 'last_news_link'):
conn.last_news_link = {}
conn.last_news_link = {
'link': selected_news.get('link', '#'),
'title': selected_news.get('title', '未知标题')
}
# 构建新闻报告
news_report = (
f"根据下列数据,用{lang}回应用户的新闻查询请求:\n\n"
f"新闻标题: {selected_news['title']}\n"
f"发布时间: {selected_news['pubDate']}\n"
f"新闻内容: {selected_news['description']}\n"
f"(请以自然、流畅的方式向用户播报这条新闻,可以适当总结内容,"
f"直接读出新闻即可,不需要额外多余的内容。"
f"如果用户询问更多详情,告知用户可以说'请详细介绍这条新闻'获取更多内容)"
)
return ActionResponse(Action.REQLLM, news_report, None)
except Exception as e:
logger.bind(tag=TAG).error(f"获取新闻出错: {e}")
return ActionResponse(Action.REQLLM, "抱歉,获取新闻时发生错误,请稍后再试。", None)