|
|
|
@ -3,7 +3,6 @@ from elasticsearch import Elasticsearch
|
|
|
|
|
|
|
|
|
|
from Config.Config import ES_CONFIG
|
|
|
|
|
from T2_Txt2Vec import text_to_embedding
|
|
|
|
|
from Util.EsMappingUtil import create_vector_index
|
|
|
|
|
|
|
|
|
|
# 初始化ES连接
|
|
|
|
|
es = Elasticsearch(
|
|
|
|
@ -13,6 +12,29 @@ es = Elasticsearch(
|
|
|
|
|
ssl_show_warn=ES_CONFIG["ssl_show_warn"]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def check_text_exists(text, index_name="raw_texts"):
|
|
|
|
|
"""检查文本是否已存在"""
|
|
|
|
|
try:
|
|
|
|
|
# 添加长度验证
|
|
|
|
|
if len(text) > 256:
|
|
|
|
|
text = text[:256]
|
|
|
|
|
|
|
|
|
|
query = {
|
|
|
|
|
"query": {
|
|
|
|
|
"bool": {
|
|
|
|
|
"must": [
|
|
|
|
|
{"term": {"text.keyword": text}},
|
|
|
|
|
{"match": {"text": text}}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
res = es.search(index=index_name, body=query, size=1)
|
|
|
|
|
return res['hits']['hits'][0]['_id'] if res['hits']['hits'] else None
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"检查失败: {str(e)}")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def save_vector(text, index_name="knowledge_base"):
|
|
|
|
|
"""将文本向量化后保存到ES"""
|
|
|
|
|
try:
|
|
|
|
@ -51,16 +73,26 @@ def save_raw_text(text, index_name="raw_texts"):
|
|
|
|
|
print(f"保存失败: {str(e)}")
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
def save_text_with_check(text):
|
|
|
|
|
"""带检查的保存流程"""
|
|
|
|
|
# 检查文本是否已存在
|
|
|
|
|
existing_id = check_text_exists(text)
|
|
|
|
|
if existing_id:
|
|
|
|
|
print(f"文本已存在,ID: {existing_id}")
|
|
|
|
|
return existing_id
|
|
|
|
|
|
|
|
|
|
# 保存向量
|
|
|
|
|
vector_id = save_vector(text)
|
|
|
|
|
|
|
|
|
|
# 保存原始文本
|
|
|
|
|
raw_id = save_raw_text(text)
|
|
|
|
|
|
|
|
|
|
return {"vector_id": vector_id, "raw_id": raw_id}
|
|
|
|
|
|
|
|
|
|
# 使用示例
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
# 创建向量索引
|
|
|
|
|
create_vector_index(dims=200)
|
|
|
|
|
|
|
|
|
|
# 示例文本
|
|
|
|
|
sample_text = "如何更换支付宝绑定银行卡"
|
|
|
|
|
|
|
|
|
|
# 保存向量化文档
|
|
|
|
|
save_vector(sample_text)
|
|
|
|
|
|
|
|
|
|
# 保存原始文本
|
|
|
|
|
save_raw_text(sample_text)
|
|
|
|
|
# 带检查的保存
|
|
|
|
|
save_text_with_check(sample_text)
|