From e1d2472ebaa0146e375eb7bf84d2ddb9c5715ce4 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Mon, 23 Jun 2025 20:28:58 +0800 Subject: [PATCH] 'commit' --- dsRag/T9_RAG.py | 121 +++++++++++++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 53 deletions(-) diff --git a/dsRag/T9_RAG.py b/dsRag/T9_RAG.py index 3e219b76..1d14c448 100644 --- a/dsRag/T9_RAG.py +++ b/dsRag/T9_RAG.py @@ -3,50 +3,94 @@ pip install openai ''' from elasticsearch import Elasticsearch from Util.EmbeddingUtil import text_to_embedding -import Config.Config as config from openai import OpenAI -import json - +from Config import Config # 初始化ES连接 es = Elasticsearch( - hosts=config.ES_CONFIG['hosts'], - basic_auth=config.ES_CONFIG['basic_auth'], - verify_certs=config.ES_CONFIG['verify_certs'] + hosts=Config.ES_CONFIG['hosts'], + basic_auth=Config.ES_CONFIG['basic_auth'], + verify_certs=Config.ES_CONFIG['verify_certs'] ) # 初始化DeepSeek客户端 -client = OpenAI(api_key=config.DEEPSEEK_API_KEY) +client = OpenAI( + api_key=Config.DEEPSEEK_API_KEY, + base_url=Config.DEEPSEEK_URL +) + +def generate_report(query, context): + """使用DeepSeek生成报告""" + prompt = f"""根据以下关于'{query}'的相关信息,整理一份结构化的报告: +要求: +1. 分章节组织内容 +2. 包含关键数据和事实 +3. 语言简洁专业 + +相关信息: +{context}""" + + try: + response = client.chat.completions.create( + model="deepseek-chat", + messages=[ + {"role": "system", "content": "你是一个专业的文档整理助手"}, + {"role": "user", "content": prompt} + ], + temperature=0.3, + stream=True + ) + + # 流式输出处理 + full_response = "" + for chunk in response: + if chunk.choices[0].delta.content: + content = chunk.choices[0].delta.content + print(content, end="", flush=True) + full_response += content + + return full_response + except Exception as e: + print(f"生成报告时出错: {str(e)}") + return "" + +def process_query(query): + """处理用户查询并生成报告""" + print(f"正在搜索与'{query}'相关的数据...") + context = search_related_data(query) + print(f"找到{len(context.split(chr(10)+chr(10)))}条相关数据") + + print("正在生成报告...") + report = generate_report(query, context) + + return report def search_related_data(query): - """搜索向量数据和原始相关数据""" + """搜索与查询相关的数据""" # 向量搜索 - vector = text_to_embedding(query) + query_vector = text_to_embedding(query) vector_results = es.search( - index='knowledge_base', + index=Config.ES_CONFIG['default_index'], body={ - "size": 5, "query": { "script_score": { "query": {"match_all": {}}, "script": { "source": "cosineSimilarity(params.query_vector, 'vector') + 1.0", - "params": {"query_vector": vector} + "params": {"query_vector": query_vector} } } }, - "_source": ["text"] + "size": 5 } ) # 文本精确搜索 text_results = es.search( - index='raw_texts', + index="raw_texts", body={ "query": { - "multi_match": { - "query": query, - "fields": ["text"], - "type": "best_fields" + "match": { + "text.keyword": query } }, "size": 5 @@ -54,43 +98,14 @@ def search_related_data(query): ) # 合并结果 - all_results = [hit['_source']['text'] for hit in vector_results['hits']['hits']] - all_results.extend([hit['_source']['text'] for hit in text_results['hits']['hits']]) + context = "" + for hit in vector_results['hits']['hits']: + context += f"向量相似度结果(score={hit['_score']}):\n{hit['_source']['text']}\n\n" - return "\n\n".join(all_results) - -def generate_report(query, context): - """使用DeepSeek生成报告""" - prompt = f"""根据以下关于'{query}'的相关信息,整理一份结构化的报告: -要求: -1. 分章节组织内容 -2. 包含关键数据和事实 -3. 语言简洁专业 - -相关信息: -{context}""" + for hit in text_results['hits']['hits']: + context += f"文本精确匹配结果(score={hit['_score']}):\n{hit['_source']['text']}\n\n" - response = client.chat.completions.create( - model="deepseek-chat", - messages=[ - {"role": "system", "content": "你是一个专业的文档整理助手"}, - {"role": "user", "content": prompt} - ], - temperature=0.3 - ) - - return response.choices[0].message.content - -def process_query(query): - """处理用户查询并生成报告""" - print(f"正在搜索与'{query}'相关的数据...") - context = search_related_data(query) - print(f"找到{len(context.split(chr(10)+chr(10)))}条相关数据") - - print("正在生成报告...") - report = generate_report(query, context) - - return report + return context if __name__ == "__main__": #user_query = input("请输入您的查询要求:")