import asyncio import logging import jieba import requests from bs4 import BeautifulSoup from fastapi import HTTPException from WxMini.Milvus.Config.MulvusConfig import * # 配置日志 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) # 分词与关键词提取 def extract_keywords(user_input): # 使用 jieba 进行分词 words = jieba.lcut(user_input) # 过滤掉无意义的词(如标点符号、停用词等) stop_words = ['的', '了', '吗', '呢', '是', '在', '啊', '呀', '怎么', '怎么样', '今天','今日', '?', ',', '。'] keywords = [word for word in words if word not in stop_words] return keywords # 搜索新浪新闻 def search_sina_news(keyword='新闻', time='d', page=1): results = [] url = f'https://search.sina.com.cn/news?c=news&adv=1&q={keyword}&time={time}&size=20&page={str(page)}' try: response = requests.get(url, timeout=10) # 设置请求超时 response.encoding = 'utf-8' if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') result_blocks = soup.find_all('div', class_='box-result clearfix') for block in result_blocks: try: title = block.find('a').text link = block.find('a')['href'] infos = block.find('div', class_='r-info') content = infos.find('p', class_='content').text source_time = infos.find('span').text st_list = source_time.split() source = st_list[0] time = st_list[1] + ' ' + st_list[2] if len(st_list) > 2 else st_list[1] results.append({'title': title, 'link': link, 'content': content, 'source': source, 'time': time}) except AttributeError as e: logger.error(f"解析新闻块时出错: {e}") continue except requests.Timeout: logger.error("请求新浪新闻超时") except Exception as e: logger.error(f"请求新浪新闻时出错: {e}") return results # 根据关键词调用搜索函数 def search_based_on_keywords(keywords): if not keywords: return [] # 使用第一个关键词进行搜索 return search_sina_news(keyword=keywords[0], time='d', page=1) # 使用AI整理搜索结果 async def format_results_with_ai(client, results): if not results: return "未找到相关新闻。" search_text = "\n".join([f"{result['title']} ({result['time']})" for result in results]) try: response = await asyncio.wait_for( client.chat.completions.create( model=MODEL_NAME, messages=[ {"role": "system", "content": "你是一个助手,负责将搜索结果整理成用户友好的格式。"}, {"role": "user", "content": f"请将以下搜索结果整理成一段话:\n{search_text}"} ], max_tokens=4000 ), timeout=60 # 设置超时时间为 60 秒 ) return response.choices[0].message.content.strip() except asyncio.TimeoutError: logger.error("大模型调用超时") raise HTTPException(status_code=500, detail="大模型调用超时") # 主函数 async def get_news(client, user_input): keywords = extract_keywords(user_input) results = search_based_on_keywords(keywords) if results: formatted_response = await format_results_with_ai(client, results) return formatted_response else: return "未找到相关新闻。"