You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

96 lines
3.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import asyncio
import logging
import jieba
import requests
from bs4 import BeautifulSoup
from fastapi import HTTPException
from WxMini.Milvus.Config.MulvusConfig import *
# 配置日志
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
# 分词与关键词提取
def extract_keywords(user_input):
# 使用 jieba 进行分词
words = jieba.lcut(user_input)
# 过滤掉无意义的词(如标点符号、停用词等)
stop_words = ['', '', '', '', '', '', '', '', '怎么', '怎么样', '今天','今日', '', '', '']
keywords = [word for word in words if word not in stop_words]
return keywords
# 搜索新浪新闻
def search_sina_news(keyword='新闻', time='d', page=1):
results = []
url = f'https://search.sina.com.cn/news?c=news&adv=1&q={keyword}&time={time}&size=20&page={str(page)}'
try:
response = requests.get(url, timeout=10) # 设置请求超时
response.encoding = 'utf-8'
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
result_blocks = soup.find_all('div', class_='box-result clearfix')
for block in result_blocks:
try:
title = block.find('a').text
link = block.find('a')['href']
infos = block.find('div', class_='r-info')
content = infos.find('p', class_='content').text
source_time = infos.find('span').text
st_list = source_time.split()
source = st_list[0]
time = st_list[1] + ' ' + st_list[2] if len(st_list) > 2 else st_list[1]
results.append({'title': title, 'link': link, 'content': content, 'source': source, 'time': time})
except AttributeError as e:
logger.error(f"解析新闻块时出错: {e}")
continue
except requests.Timeout:
logger.error("请求新浪新闻超时")
except Exception as e:
logger.error(f"请求新浪新闻时出错: {e}")
return results
# 根据关键词调用搜索函数
def search_based_on_keywords(keywords):
if not keywords:
return []
# 使用第一个关键词进行搜索
return search_sina_news(keyword=keywords[0], time='d', page=1)
# 使用AI整理搜索结果
async def format_results_with_ai(client, results):
if not results:
return "未找到相关新闻。"
search_text = "\n".join([f"{result['title']} ({result['time']})" for result in results])
try:
response = await asyncio.wait_for(
client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": "你是一个助手,负责将搜索结果整理成用户友好的格式。"},
{"role": "user", "content": f"请将以下搜索结果整理成一段话:\n{search_text}"}
],
max_tokens=4000
),
timeout=60 # 设置超时时间为 60 秒
)
return response.choices[0].message.content.strip()
except asyncio.TimeoutError:
logger.error("大模型调用超时")
raise HTTPException(status_code=500, detail="大模型调用超时")
# 主函数
async def get_news(client, user_input):
keywords = extract_keywords(user_input)
results = search_based_on_keywords(keywords)
if results:
formatted_response = await format_results_with_ai(client, results)
return formatted_response
else:
return "未找到相关新闻。"