|
|
|
@ -3,50 +3,94 @@ pip install openai
|
|
|
|
|
'''
|
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
|
from Util.EmbeddingUtil import text_to_embedding
|
|
|
|
|
import Config.Config as config
|
|
|
|
|
from openai import OpenAI
|
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
from Config import Config
|
|
|
|
|
# 初始化ES连接
|
|
|
|
|
es = Elasticsearch(
|
|
|
|
|
hosts=config.ES_CONFIG['hosts'],
|
|
|
|
|
basic_auth=config.ES_CONFIG['basic_auth'],
|
|
|
|
|
verify_certs=config.ES_CONFIG['verify_certs']
|
|
|
|
|
hosts=Config.ES_CONFIG['hosts'],
|
|
|
|
|
basic_auth=Config.ES_CONFIG['basic_auth'],
|
|
|
|
|
verify_certs=Config.ES_CONFIG['verify_certs']
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 初始化DeepSeek客户端
|
|
|
|
|
client = OpenAI(api_key=config.DEEPSEEK_API_KEY)
|
|
|
|
|
client = OpenAI(
|
|
|
|
|
api_key=Config.DEEPSEEK_API_KEY,
|
|
|
|
|
base_url=Config.DEEPSEEK_URL
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def generate_report(query, context):
|
|
|
|
|
"""使用DeepSeek生成报告"""
|
|
|
|
|
prompt = f"""根据以下关于'{query}'的相关信息,整理一份结构化的报告:
|
|
|
|
|
要求:
|
|
|
|
|
1. 分章节组织内容
|
|
|
|
|
2. 包含关键数据和事实
|
|
|
|
|
3. 语言简洁专业
|
|
|
|
|
|
|
|
|
|
相关信息:
|
|
|
|
|
{context}"""
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
response = client.chat.completions.create(
|
|
|
|
|
model="deepseek-chat",
|
|
|
|
|
messages=[
|
|
|
|
|
{"role": "system", "content": "你是一个专业的文档整理助手"},
|
|
|
|
|
{"role": "user", "content": prompt}
|
|
|
|
|
],
|
|
|
|
|
temperature=0.3,
|
|
|
|
|
stream=True
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 流式输出处理
|
|
|
|
|
full_response = ""
|
|
|
|
|
for chunk in response:
|
|
|
|
|
if chunk.choices[0].delta.content:
|
|
|
|
|
content = chunk.choices[0].delta.content
|
|
|
|
|
print(content, end="", flush=True)
|
|
|
|
|
full_response += content
|
|
|
|
|
|
|
|
|
|
return full_response
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"生成报告时出错: {str(e)}")
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
def process_query(query):
|
|
|
|
|
"""处理用户查询并生成报告"""
|
|
|
|
|
print(f"正在搜索与'{query}'相关的数据...")
|
|
|
|
|
context = search_related_data(query)
|
|
|
|
|
print(f"找到{len(context.split(chr(10)+chr(10)))}条相关数据")
|
|
|
|
|
|
|
|
|
|
print("正在生成报告...")
|
|
|
|
|
report = generate_report(query, context)
|
|
|
|
|
|
|
|
|
|
return report
|
|
|
|
|
|
|
|
|
|
def search_related_data(query):
|
|
|
|
|
"""搜索向量数据和原始相关数据"""
|
|
|
|
|
"""搜索与查询相关的数据"""
|
|
|
|
|
# 向量搜索
|
|
|
|
|
vector = text_to_embedding(query)
|
|
|
|
|
query_vector = text_to_embedding(query)
|
|
|
|
|
vector_results = es.search(
|
|
|
|
|
index='knowledge_base',
|
|
|
|
|
index=Config.ES_CONFIG['default_index'],
|
|
|
|
|
body={
|
|
|
|
|
"size": 5,
|
|
|
|
|
"query": {
|
|
|
|
|
"script_score": {
|
|
|
|
|
"query": {"match_all": {}},
|
|
|
|
|
"script": {
|
|
|
|
|
"source": "cosineSimilarity(params.query_vector, 'vector') + 1.0",
|
|
|
|
|
"params": {"query_vector": vector}
|
|
|
|
|
"params": {"query_vector": query_vector}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"_source": ["text"]
|
|
|
|
|
"size": 5
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 文本精确搜索
|
|
|
|
|
text_results = es.search(
|
|
|
|
|
index='raw_texts',
|
|
|
|
|
index="raw_texts",
|
|
|
|
|
body={
|
|
|
|
|
"query": {
|
|
|
|
|
"multi_match": {
|
|
|
|
|
"query": query,
|
|
|
|
|
"fields": ["text"],
|
|
|
|
|
"type": "best_fields"
|
|
|
|
|
"match": {
|
|
|
|
|
"text.keyword": query
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"size": 5
|
|
|
|
@ -54,43 +98,14 @@ def search_related_data(query):
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 合并结果
|
|
|
|
|
all_results = [hit['_source']['text'] for hit in vector_results['hits']['hits']]
|
|
|
|
|
all_results.extend([hit['_source']['text'] for hit in text_results['hits']['hits']])
|
|
|
|
|
context = ""
|
|
|
|
|
for hit in vector_results['hits']['hits']:
|
|
|
|
|
context += f"向量相似度结果(score={hit['_score']}):\n{hit['_source']['text']}\n\n"
|
|
|
|
|
|
|
|
|
|
return "\n\n".join(all_results)
|
|
|
|
|
|
|
|
|
|
def generate_report(query, context):
|
|
|
|
|
"""使用DeepSeek生成报告"""
|
|
|
|
|
prompt = f"""根据以下关于'{query}'的相关信息,整理一份结构化的报告:
|
|
|
|
|
要求:
|
|
|
|
|
1. 分章节组织内容
|
|
|
|
|
2. 包含关键数据和事实
|
|
|
|
|
3. 语言简洁专业
|
|
|
|
|
|
|
|
|
|
相关信息:
|
|
|
|
|
{context}"""
|
|
|
|
|
for hit in text_results['hits']['hits']:
|
|
|
|
|
context += f"文本精确匹配结果(score={hit['_score']}):\n{hit['_source']['text']}\n\n"
|
|
|
|
|
|
|
|
|
|
response = client.chat.completions.create(
|
|
|
|
|
model="deepseek-chat",
|
|
|
|
|
messages=[
|
|
|
|
|
{"role": "system", "content": "你是一个专业的文档整理助手"},
|
|
|
|
|
{"role": "user", "content": prompt}
|
|
|
|
|
],
|
|
|
|
|
temperature=0.3
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return response.choices[0].message.content
|
|
|
|
|
|
|
|
|
|
def process_query(query):
|
|
|
|
|
"""处理用户查询并生成报告"""
|
|
|
|
|
print(f"正在搜索与'{query}'相关的数据...")
|
|
|
|
|
context = search_related_data(query)
|
|
|
|
|
print(f"找到{len(context.split(chr(10)+chr(10)))}条相关数据")
|
|
|
|
|
|
|
|
|
|
print("正在生成报告...")
|
|
|
|
|
report = generate_report(query, context)
|
|
|
|
|
|
|
|
|
|
return report
|
|
|
|
|
return context
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
#user_query = input("请输入您的查询要求:")
|
|
|
|
|