|
|
|
@ -6,19 +6,22 @@ import requests
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
from fastapi import HTTPException
|
|
|
|
|
from WxMini.Milvus.Config.MulvusConfig import *
|
|
|
|
|
|
|
|
|
|
# 配置日志
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 分词与关键词提取
|
|
|
|
|
def extract_keywords(user_input):
|
|
|
|
|
# 使用 jieba 进行分词
|
|
|
|
|
words = jieba.lcut(user_input)
|
|
|
|
|
# 过滤掉无意义的词(如标点符号、停用词等)
|
|
|
|
|
stop_words = ['的', '了', '吗', '呢', '是', '在', '啊', '呀', '怎么', '怎么样', '?', ',', '。']
|
|
|
|
|
stop_words = ['的', '了', '吗', '呢', '是', '在', '啊', '呀', '怎么', '怎么样', '今天','今日', '?', ',', '。']
|
|
|
|
|
keywords = [word for word in words if word not in stop_words]
|
|
|
|
|
return keywords
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 搜索新浪新闻
|
|
|
|
|
def search_sina_news(keyword='新闻', time='d', page=1):
|
|
|
|
|
results = []
|
|
|
|
@ -49,6 +52,7 @@ def search_sina_news(keyword='新闻', time='d', page=1):
|
|
|
|
|
logger.error(f"请求新浪新闻时出错: {e}")
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 根据关键词调用搜索函数
|
|
|
|
|
def search_based_on_keywords(keywords):
|
|
|
|
|
if not keywords:
|
|
|
|
@ -56,6 +60,7 @@ def search_based_on_keywords(keywords):
|
|
|
|
|
# 使用第一个关键词进行搜索
|
|
|
|
|
return search_sina_news(keyword=keywords[0], time='d', page=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 使用AI整理搜索结果
|
|
|
|
|
async def format_results_with_ai(client, results):
|
|
|
|
|
if not results:
|
|
|
|
@ -78,6 +83,7 @@ async def format_results_with_ai(client, results):
|
|
|
|
|
logger.error("大模型调用超时")
|
|
|
|
|
raise HTTPException(status_code=500, detail="大模型调用超时")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 主函数
|
|
|
|
|
async def get_news(client, user_input):
|
|
|
|
|
keywords = extract_keywords(user_input)
|
|
|
|
@ -87,4 +93,3 @@ async def get_news(client, user_input):
|
|
|
|
|
return formatted_response
|
|
|
|
|
else:
|
|
|
|
|
return "未找到相关新闻。"
|
|
|
|
|
|
|
|
|
|