You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

67 lines
1.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from datetime import datetime
from elasticsearch import Elasticsearch
from Config.Config import ES_CONFIG
from T2_Txt2Vec import text_to_embedding
from Util.EsMappingUtil import create_vector_index
# 初始化ES连接
es = Elasticsearch(
hosts=ES_CONFIG["hosts"],
basic_auth=ES_CONFIG["basic_auth"],
verify_certs=ES_CONFIG["verify_certs"],
ssl_show_warn=ES_CONFIG["ssl_show_warn"]
)
def save_vector(text, index_name="knowledge_base"):
"""将文本向量化后保存到ES"""
try:
# 向量化文本
vector = text_to_embedding(text)
# 准备文档
doc = {
"text": text,
"vector": vector,
"timestamp": datetime.now()
}
# 保存到ES
res = es.index(index=index_name, document=doc)
print(f"向量化文档已保存ID: {res['_id']}")
return res['_id']
except Exception as e:
print(f"保存失败: {str(e)}")
raise
def save_raw_text(text, index_name="raw_texts"):
"""保存原始文本到ES"""
try:
# 准备文档
doc = {
"text": text,
"timestamp": datetime.now()
}
# 保存到ES
res = es.index(index=index_name, document=doc)
print(f"原始文本已保存ID: {res['_id']}")
return res['_id']
except Exception as e:
print(f"保存失败: {str(e)}")
raise
# 使用示例
if __name__ == "__main__":
# 创建向量索引
create_vector_index(dims=200)
# 示例文本
sample_text = "如何更换支付宝绑定银行卡"
# 保存向量化文档
save_vector(sample_text)
# 保存原始文本
save_raw_text(sample_text)