|
|
|
@ -0,0 +1,90 @@
|
|
|
|
|
import asyncio
|
|
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
import jieba
|
|
|
|
|
import requests
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
from fastapi import HTTPException
|
|
|
|
|
from WxMini.Milvus.Config.MulvusConfig import *
|
|
|
|
|
# 配置日志
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
# 分词与关键词提取
|
|
|
|
|
def extract_keywords(user_input):
|
|
|
|
|
# 使用 jieba 进行分词
|
|
|
|
|
words = jieba.lcut(user_input)
|
|
|
|
|
# 过滤掉无意义的词(如标点符号、停用词等)
|
|
|
|
|
stop_words = ['的', '了', '吗', '呢', '是', '在', '啊', '呀', '怎么', '怎么样', '?', ',', '。']
|
|
|
|
|
keywords = [word for word in words if word not in stop_words]
|
|
|
|
|
return keywords
|
|
|
|
|
|
|
|
|
|
# 搜索新浪新闻
|
|
|
|
|
def search_sina_news(keyword='新闻', time='d', page=1):
|
|
|
|
|
results = []
|
|
|
|
|
url = f'https://search.sina.com.cn/news?c=news&adv=1&q={keyword}&time={time}&size=20&page={str(page)}'
|
|
|
|
|
try:
|
|
|
|
|
response = requests.get(url, timeout=10) # 设置请求超时
|
|
|
|
|
response.encoding = 'utf-8'
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
|
result_blocks = soup.find_all('div', class_='box-result clearfix')
|
|
|
|
|
for block in result_blocks:
|
|
|
|
|
try:
|
|
|
|
|
title = block.find('a').text
|
|
|
|
|
link = block.find('a')['href']
|
|
|
|
|
infos = block.find('div', class_='r-info')
|
|
|
|
|
content = infos.find('p', class_='content').text
|
|
|
|
|
source_time = infos.find('span').text
|
|
|
|
|
st_list = source_time.split()
|
|
|
|
|
source = st_list[0]
|
|
|
|
|
time = st_list[1] + ' ' + st_list[2] if len(st_list) > 2 else st_list[1]
|
|
|
|
|
results.append({'title': title, 'link': link, 'content': content, 'source': source, 'time': time})
|
|
|
|
|
except AttributeError as e:
|
|
|
|
|
logger.error(f"解析新闻块时出错: {e}")
|
|
|
|
|
continue
|
|
|
|
|
except requests.Timeout:
|
|
|
|
|
logger.error("请求新浪新闻超时")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"请求新浪新闻时出错: {e}")
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
# 根据关键词调用搜索函数
|
|
|
|
|
def search_based_on_keywords(keywords):
|
|
|
|
|
if not keywords:
|
|
|
|
|
return []
|
|
|
|
|
# 使用第一个关键词进行搜索
|
|
|
|
|
return search_sina_news(keyword=keywords[0], time='d', page=1)
|
|
|
|
|
|
|
|
|
|
# 使用AI整理搜索结果
|
|
|
|
|
async def format_results_with_ai(client, results):
|
|
|
|
|
if not results:
|
|
|
|
|
return "未找到相关新闻。"
|
|
|
|
|
search_text = "\n".join([f"{result['title']} ({result['time']})" for result in results])
|
|
|
|
|
try:
|
|
|
|
|
response = await asyncio.wait_for(
|
|
|
|
|
client.chat.completions.create(
|
|
|
|
|
model=MODEL_NAME,
|
|
|
|
|
messages=[
|
|
|
|
|
{"role": "system", "content": "你是一个助手,负责将搜索结果整理成用户友好的格式。"},
|
|
|
|
|
{"role": "user", "content": f"请将以下搜索结果整理成一段话:\n{search_text}"}
|
|
|
|
|
],
|
|
|
|
|
max_tokens=4000
|
|
|
|
|
),
|
|
|
|
|
timeout=60 # 设置超时时间为 60 秒
|
|
|
|
|
)
|
|
|
|
|
return response.choices[0].message.content.strip()
|
|
|
|
|
except asyncio.TimeoutError:
|
|
|
|
|
logger.error("大模型调用超时")
|
|
|
|
|
raise HTTPException(status_code=500, detail="大模型调用超时")
|
|
|
|
|
|
|
|
|
|
# 主函数
|
|
|
|
|
async def get_news(client, user_input):
|
|
|
|
|
keywords = extract_keywords(user_input)
|
|
|
|
|
results = search_based_on_keywords(keywords)
|
|
|
|
|
if results:
|
|
|
|
|
formatted_response = await format_results_with_ai(client, results)
|
|
|
|
|
return formatted_response
|
|
|
|
|
else:
|
|
|
|
|
return "未找到相关新闻。"
|
|
|
|
|
|