You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
3.8 KiB

4 months ago
import asyncio
import logging
import jieba
import requests
from bs4 import BeautifulSoup
from fastapi import HTTPException
from WxMini.Milvus.Config.MulvusConfig import *
4 months ago
4 months ago
# 配置日志
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
4 months ago
4 months ago
# 分词与关键词提取
def extract_keywords(user_input):
# 使用 jieba 进行分词
words = jieba.lcut(user_input)
# 过滤掉无意义的词(如标点符号、停用词等)
4 months ago
stop_words = ['', '', '', '', '', '', '', '', '怎么', '怎么样', '今天','今日', '', '', '']
4 months ago
keywords = [word for word in words if word not in stop_words]
return keywords
4 months ago
4 months ago
# 搜索新浪新闻
def search_sina_news(keyword='新闻', time='d', page=1):
results = []
url = f'https://search.sina.com.cn/news?c=news&adv=1&q={keyword}&time={time}&size=20&page={str(page)}'
try:
response = requests.get(url, timeout=10) # 设置请求超时
response.encoding = 'utf-8'
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
result_blocks = soup.find_all('div', class_='box-result clearfix')
for block in result_blocks:
try:
title = block.find('a').text
link = block.find('a')['href']
infos = block.find('div', class_='r-info')
content = infos.find('p', class_='content').text
source_time = infos.find('span').text
st_list = source_time.split()
source = st_list[0]
time = st_list[1] + ' ' + st_list[2] if len(st_list) > 2 else st_list[1]
results.append({'title': title, 'link': link, 'content': content, 'source': source, 'time': time})
except AttributeError as e:
logger.error(f"解析新闻块时出错: {e}")
continue
except requests.Timeout:
logger.error("请求新浪新闻超时")
except Exception as e:
logger.error(f"请求新浪新闻时出错: {e}")
return results
4 months ago
4 months ago
# 根据关键词调用搜索函数
def search_based_on_keywords(keywords):
if not keywords:
return []
# 使用第一个关键词进行搜索
return search_sina_news(keyword=keywords[0], time='d', page=1)
4 months ago
4 months ago
# 使用AI整理搜索结果
async def format_results_with_ai(client, results):
if not results:
return "未找到相关新闻。"
search_text = "\n".join([f"{result['title']} ({result['time']})" for result in results])
try:
response = await asyncio.wait_for(
client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": "你是一个助手,负责将搜索结果整理成用户友好的格式。"},
{"role": "user", "content": f"请将以下搜索结果整理成一段话:\n{search_text}"}
],
max_tokens=4000
),
timeout=60 # 设置超时时间为 60 秒
)
return response.choices[0].message.content.strip()
except asyncio.TimeoutError:
logger.error("大模型调用超时")
raise HTTPException(status_code=500, detail="大模型调用超时")
4 months ago
4 months ago
# 主函数
async def get_news(client, user_input):
keywords = extract_keywords(user_input)
results = search_based_on_keywords(keywords)
if results:
formatted_response = await format_results_with_ai(client, results)
return formatted_response
else:
return "未找到相关新闻。"