main
HuangHai 1 month ago
parent a6a04529ec
commit 1b38446d5f

@ -3,7 +3,6 @@ from elasticsearch import Elasticsearch
from Config.Config import ES_CONFIG from Config.Config import ES_CONFIG
from T2_Txt2Vec import text_to_embedding from T2_Txt2Vec import text_to_embedding
from Util.EsMappingUtil import create_vector_index
# 初始化ES连接 # 初始化ES连接
es = Elasticsearch( es = Elasticsearch(
@ -13,6 +12,29 @@ es = Elasticsearch(
ssl_show_warn=ES_CONFIG["ssl_show_warn"] ssl_show_warn=ES_CONFIG["ssl_show_warn"]
) )
def check_text_exists(text, index_name="raw_texts"):
"""检查文本是否已存在"""
try:
# 添加长度验证
if len(text) > 256:
text = text[:256]
query = {
"query": {
"bool": {
"must": [
{"term": {"text.keyword": text}},
{"match": {"text": text}}
]
}
}
}
res = es.search(index=index_name, body=query, size=1)
return res['hits']['hits'][0]['_id'] if res['hits']['hits'] else None
except Exception as e:
print(f"检查失败: {str(e)}")
return None
def save_vector(text, index_name="knowledge_base"): def save_vector(text, index_name="knowledge_base"):
"""将文本向量化后保存到ES""" """将文本向量化后保存到ES"""
try: try:
@ -51,16 +73,26 @@ def save_raw_text(text, index_name="raw_texts"):
print(f"保存失败: {str(e)}") print(f"保存失败: {str(e)}")
raise raise
def save_text_with_check(text):
"""带检查的保存流程"""
# 检查文本是否已存在
existing_id = check_text_exists(text)
if existing_id:
print(f"文本已存在ID: {existing_id}")
return existing_id
# 保存向量
vector_id = save_vector(text)
# 保存原始文本
raw_id = save_raw_text(text)
return {"vector_id": vector_id, "raw_id": raw_id}
# 使用示例 # 使用示例
if __name__ == "__main__": if __name__ == "__main__":
# 创建向量索引
create_vector_index(dims=200)
# 示例文本 # 示例文本
sample_text = "如何更换支付宝绑定银行卡" sample_text = "如何更换支付宝绑定银行卡"
# 保存向量化文档 # 带检查的保存
save_vector(sample_text) save_text_with_check(sample_text)
# 保存原始文本
save_raw_text(sample_text)

@ -71,8 +71,12 @@ def create_text_index(index_name="raw_texts"):
"properties": { "properties": {
"text": { "text": {
"type": "text", "type": "text",
"analyzer": "ik_max_word", "fields": {
"search_analyzer": "ik_smart" "keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}, },
"timestamp": { "timestamp": {
"type": "date" "type": "date"

Loading…
Cancel
Save