From fc8125f3f0785f738c15bf87e3649153b3eb2978 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Mon, 31 Mar 2025 13:57:33 +0800 Subject: [PATCH] 'commit' --- AI/WxMini/Start.py | 12 ++- AI/WxMini/Test/CallAi.py | 2 +- AI/WxMini/Test/NEWS2.py | 11 +++ AI/WxMini/Test/NEWS_1.py | 90 ++++++++++++++++++ AI/WxMini/Utils/MySQLUtil.py | 2 +- AI/WxMini/Utils/NewsUtil.py | 90 ++++++++++++++++++ .../__pycache__/NewsUtil.cpython-310.pyc | Bin 0 -> 2761 bytes 7 files changed, 202 insertions(+), 5 deletions(-) create mode 100644 AI/WxMini/Test/NEWS2.py create mode 100644 AI/WxMini/Test/NEWS_1.py create mode 100644 AI/WxMini/Utils/NewsUtil.py create mode 100644 AI/WxMini/Utils/__pycache__/NewsUtil.cpython-310.pyc diff --git a/AI/WxMini/Start.py b/AI/WxMini/Start.py index 0f270910..9b5bcef5 100644 --- a/AI/WxMini/Start.py +++ b/AI/WxMini/Start.py @@ -1,8 +1,7 @@ -import asyncio +import base64 import base64 import datetime import json -import logging import time import uuid from contextlib import asynccontextmanager @@ -12,7 +11,7 @@ from typing import Optional from alibabacloud_sts20150401 import models as sts_20150401_models from alibabacloud_sts20150401.client import Client as Sts20150401Client from alibabacloud_tea_openapi.models import Config -from fastapi import Query, Depends, HTTPException, status, Form, FastAPI +from fastapi import Query, Depends, status, Form, FastAPI from fastapi.security import OAuth2PasswordBearer from jose import JWTError, jwt from openai import AsyncOpenAI @@ -26,6 +25,7 @@ from WxMini.Utils.ImageUtil import * from WxMini.Utils.MySQLUtil import init_mysql_pool, get_chat_log_by_session, get_user_by_login_name, \ get_chat_logs_by_risk_flag, get_chat_logs_summary, save_chat_to_mysql from WxMini.Utils.MySQLUtil import update_risk, get_last_chat_log_id +from WxMini.Utils.NewsUtil import * from WxMini.Utils.OssUtil import upload_mp3_to_oss_from_memory, hmacsha256 from WxMini.Utils.TianQiUtil import get_weather from WxMini.Utils.TtsUtil import TTS @@ -319,6 +319,12 @@ async def reply(person_id: str = Form(...), logger.info(f"历史交互提示词: {history_prompt}") + # NBA与CBA + result = await get_news(client, prompt) + if result is not None: + history_prompt += result + print("新闻返回了下面的内容:" + result) + # 调用大模型,将历史交互作为提示词 try: response = await asyncio.wait_for( diff --git a/AI/WxMini/Test/CallAi.py b/AI/WxMini/Test/CallAi.py index f14e88d8..0f4f14eb 100644 --- a/AI/WxMini/Test/CallAi.py +++ b/AI/WxMini/Test/CallAi.py @@ -1,5 +1,5 @@ from openai import OpenAI -from TtsConfig import * +from WxMini.Milvus.Config.MulvusConfig import * # 初始化 OpenAI 客户端 diff --git a/AI/WxMini/Test/NEWS2.py b/AI/WxMini/Test/NEWS2.py new file mode 100644 index 00000000..37b0ea4c --- /dev/null +++ b/AI/WxMini/Test/NEWS2.py @@ -0,0 +1,11 @@ +from WxMini.Utils.NewsUtil import * + + +# 示例调用 +if __name__ == '__main__': + user_input = "最近CBA的比赛结果怎么样?" + result=get_news(user_input) + if result is not None: + print(result) + else: + print("No results found.") diff --git a/AI/WxMini/Test/NEWS_1.py b/AI/WxMini/Test/NEWS_1.py new file mode 100644 index 00000000..b7a0d6de --- /dev/null +++ b/AI/WxMini/Test/NEWS_1.py @@ -0,0 +1,90 @@ +import requests +from bs4 import BeautifulSoup + +def search_sina_news(keyword='新闻', time='d', page=1): + """ + 搜索新浪新闻 + :param keyword: 搜索关键字,默认为'新闻' + :param time: 时间范围,默认为'd'(一天内) + :param page: 页码,默认为1(第一页) + :return: 返回抓取到的搜索结果列表 + """ + results = [] # 存放抓取到搜索结果的列表 + order = 0 # 抓取到的搜索结果序号 + + url = f'https://search.sina.com.cn/news?c=news&adv=1&q={keyword}&time={time}&size=20&page={str(page)}' + """ + 参数说明 + c=news news-新闻 + adv=1 1-高级搜索模式; 0-普通搜索模式 + q 关键字 + time 时间范围:h-一个小时内;d-一天内;w-一周内;m-一个月内;年份数字(如2023、2024)-表示限定指定的年份内 + size=20 每页显示的结果数量,范围10-20 + page 当前抓取的页号,1-第一页,2-第二页,依此类推 + """ + + # 发送get请求 + response = requests.get(url) + response.encoding = 'utf-8' + + # 检查请求是否成功 + if response.status_code == 200: + # 解析响应内容 + soup = BeautifulSoup(response.text, 'html.parser') + + if page == 1: # 只有第一页是才抓取 + try: + # 获取总的搜索结果信息 + news_number = soup.find('div', 'l_v2').text + print(news_number) + except Exception as e: + print(e) + + # 抓取当前页面中的搜索结果 + result_blocks = soup.find_all('div', class_='box-result clearfix') + print(f'第{page}页抓取到的搜索结果数量为{len(result_blocks)}') + + # 从result_blocks列表中提取有效的数据 + for block in result_blocks: + order += 1 + title = block.find('a').text # 获取标题 + link = block.find('a')['href'] # 获取链接 + # 获取包含内容摘要、来源、发布时间的信息块 + infos = block.find('div', class_='r-info') + # 获取内容 + content = infos.find('p', class_='content').text + # 获取来源和发布时间 + source_time = infos.find('span').text + st_list = source_time.split() + source = st_list[0] + if len(st_list) > 2: + time = st_list[1] + ' ' + st_list[2] # 时间格式为yyyy-mm-dd hh:mm:ss + else: + time = st_list[1] # 时间格式为XX小时前 + results.append({ + 'order': order, + 'title': title, + 'link': link, + 'content': content, + 'source': source, + 'time': time + }) + # 在屏幕上输出抓取到的信息 + print(order, ". ", title) + print(link) + print(content) + print(source, ' ', time) + print(" ") + + print(f'本次共抓取到的搜索结果共{len(results)}条') + return results + else: + print('status_code!=200, 不能解析内容') + return [] + +# 示例调用 +if __name__ == '__main__': + # 默认抓取第一页 + results = search_sina_news(keyword='NBA', time='d', page=1) + # 如果需要抓取其他页,可以传入page参数 + # results = search_sina_news(keyword='NBA', time='d', page=2) \ No newline at end of file diff --git a/AI/WxMini/Utils/MySQLUtil.py b/AI/WxMini/Utils/MySQLUtil.py index 7560e77a..2dcf13e9 100644 --- a/AI/WxMini/Utils/MySQLUtil.py +++ b/AI/WxMini/Utils/MySQLUtil.py @@ -90,7 +90,7 @@ async def get_chat_log_by_session(mysql_pool, person_id, page=1, page_size=10): records = await cur.fetchall() # 将查询结果反转,确保最新消息显示在最后 - if records: + if page==1 and records: records.reverse() # 将查询结果转换为字典列表 diff --git a/AI/WxMini/Utils/NewsUtil.py b/AI/WxMini/Utils/NewsUtil.py new file mode 100644 index 00000000..66e83a90 --- /dev/null +++ b/AI/WxMini/Utils/NewsUtil.py @@ -0,0 +1,90 @@ +import asyncio +import logging + +import jieba +import requests +from bs4 import BeautifulSoup +from fastapi import HTTPException +from WxMini.Milvus.Config.MulvusConfig import * +# 配置日志 +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + +# 分词与关键词提取 +def extract_keywords(user_input): + # 使用 jieba 进行分词 + words = jieba.lcut(user_input) + # 过滤掉无意义的词(如标点符号、停用词等) + stop_words = ['的', '了', '吗', '呢', '是', '在', '啊', '呀', '怎么', '怎么样', '?', ',', '。'] + keywords = [word for word in words if word not in stop_words] + return keywords + +# 搜索新浪新闻 +def search_sina_news(keyword='新闻', time='d', page=1): + results = [] + url = f'https://search.sina.com.cn/news?c=news&adv=1&q={keyword}&time={time}&size=20&page={str(page)}' + try: + response = requests.get(url, timeout=10) # 设置请求超时 + response.encoding = 'utf-8' + if response.status_code == 200: + soup = BeautifulSoup(response.text, 'html.parser') + result_blocks = soup.find_all('div', class_='box-result clearfix') + for block in result_blocks: + try: + title = block.find('a').text + link = block.find('a')['href'] + infos = block.find('div', class_='r-info') + content = infos.find('p', class_='content').text + source_time = infos.find('span').text + st_list = source_time.split() + source = st_list[0] + time = st_list[1] + ' ' + st_list[2] if len(st_list) > 2 else st_list[1] + results.append({'title': title, 'link': link, 'content': content, 'source': source, 'time': time}) + except AttributeError as e: + logger.error(f"解析新闻块时出错: {e}") + continue + except requests.Timeout: + logger.error("请求新浪新闻超时") + except Exception as e: + logger.error(f"请求新浪新闻时出错: {e}") + return results + +# 根据关键词调用搜索函数 +def search_based_on_keywords(keywords): + if not keywords: + return [] + # 使用第一个关键词进行搜索 + return search_sina_news(keyword=keywords[0], time='d', page=1) + +# 使用AI整理搜索结果 +async def format_results_with_ai(client, results): + if not results: + return "未找到相关新闻。" + search_text = "\n".join([f"{result['title']} ({result['time']})" for result in results]) + try: + response = await asyncio.wait_for( + client.chat.completions.create( + model=MODEL_NAME, + messages=[ + {"role": "system", "content": "你是一个助手,负责将搜索结果整理成用户友好的格式。"}, + {"role": "user", "content": f"请将以下搜索结果整理成一段话:\n{search_text}"} + ], + max_tokens=4000 + ), + timeout=60 # 设置超时时间为 60 秒 + ) + return response.choices[0].message.content.strip() + except asyncio.TimeoutError: + logger.error("大模型调用超时") + raise HTTPException(status_code=500, detail="大模型调用超时") + +# 主函数 +async def get_news(client, user_input): + keywords = extract_keywords(user_input) + results = search_based_on_keywords(keywords) + if results: + formatted_response = await format_results_with_ai(client, results) + return formatted_response + else: + return "未找到相关新闻。" + diff --git a/AI/WxMini/Utils/__pycache__/NewsUtil.cpython-310.pyc b/AI/WxMini/Utils/__pycache__/NewsUtil.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ed2c1afe3a0f0a759adc9d2a0a0bdeeb089b00de GIT binary patch literal 2761 zcmaJ@-)|Jh9pBksx4vIC#v!ygV_HQjeJFXL3KHZ!f5W`SKI6VbYE@O@0rmTxvkA6P@3i0f z&g{&5zccgsd}h;1rKrL0;K6@3{|nwXSb6+sLgnZ1@D>=VQ4(wJpd>DVx*qFo!!-!D z8L`>6Tub?8Z2w9Nt*}5XYG2V^hZd-FNppwj7%kJvCE^xol@3EoiH^`+P?qT^-3?`h zk~3QEsSSvyHEKjwUJCt8L|5c$>k$8-7WZ?$Xg08xX!%KLoZ`-Krr0q&wvu26Y%g~ zfDzgX$+a7#N19qsrz9uK8XKcJH7;p6nb9tio}TMxG-|3h`(e#H-r(Ds-H=A(rRi<4 zbeJCZ_zXP%-V3Iw<=Wd$4`NUaqUbsKoqiTalNwsg z9G#>5&6F+8y&fgaSJI?8H+^dE&GWNS63x9KqL|M$!gCzGdgp>H9*ZL`aD$GIVRTGo zuhu;fo)^py1a1&U;Pqp18u&3k4$js9X`J{!3t=`)r4p`gWyhRHp31m{)iRi;(W!u` z>C$RN;k@sfagd3cA&VIgnHMFUOh`+mBAued{*@&Ja8b1pfjDj^2Qy*FFW%=LD=Fv|PUk-gUcY z3bLZ-CP2gj5Llj(TP>4XOFBCzEbwh=U(|bcZnp|Z1Uhs@?-laGjP~~Fo|8K(4lUjw zExR|g<t3yD)!{Y!ZXY89XjjE27DyaeNh)woJtSsA*a^{RQ5j=;!aQOv6s z3Ej11vL>{Rj%#%H(;9fY#$n47W6%aLGgeCZP+rW-`7l6k?3#6h5bf-9xq&2XbSJm? z#Y3M3i*+6)J`$@QB$Ek%>BV3QiwU1DPaT;!JJq!(MAQzay449Db;GIW4^MRbCe*D= zEFAjD#@Dcu-O{3H0|b2r=PL~wEq8a%r{@o`kY};j7sL>1Av%v#4`QElPZD36i!5B| z+U!u2ETocjWFbfs5hg;KyyGVf4{!q^%=UnJK=#!vX^BY0p)}(tSzXc22@WMIDl?F64P_f8U67XEGFqkp=Hg{_O-F_z!>{XDYwC)9An?HA;<(l-v>aGf zsUb;mTQmd+MnfrA+pDC!I*{^l)D0AMU3tcJb5qt$b-rDF-?M5z9*~+^e@$noKGGgQ zV#l5+<@_1O!Jr4U?!kqWwSD1j!pl1siAB$ko+#ZmzraP<&Q8P7)vs3iAKzNN^Ums> zzpee|X8+fhzrFX{`?o*4fBP?MU%c19dTssopV#mHzW=9d{XgDbzxrPP{j2ML_-p_D zFV{Y}y!QE5>z`ceuiRU^_rW*sybJnmvNR3{L^DylvpR&hf9p$_YW42tt9LFxaYBgR z|NB4g-@5+oy-$jG3*bc!Nz2R0%AUEmw2y2zp$X+Vm2 zO({^TS!*W4hy#jX_U`B)IRUKL5|W+y7eLVP&VL#DbG&Ym*mx7dJ*YE80K~!D|1ow} z8Qz_20*o}~`3p+E5wN4ti(w-bl7(Lp;IVUojp#W GT=^dnSIk=g literal 0 HcmV?d00001