main
HuangHai 1 month ago
parent 1b38446d5f
commit f8132a45f2

@ -1,6 +1,17 @@
import logging
from datetime import datetime
from elasticsearch import Elasticsearch
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('sentence_save.log'),
logging.StreamHandler()
]
)
from Config.Config import ES_CONFIG
from T2_Txt2Vec import text_to_embedding
@ -38,22 +49,20 @@ def check_text_exists(text, index_name="raw_texts"):
def save_vector(text, index_name="knowledge_base"):
"""将文本向量化后保存到ES"""
try:
# 向量化文本
logging.info(f"开始向量化文本: {text[:50]}...")
vector = text_to_embedding(text)
# 准备文档
doc = {
"text": text,
"vector": vector,
"timestamp": datetime.now()
}
# 保存到ES
res = es.index(index=index_name, document=doc)
print(f"向量化文档已保存,ID: {res['_id']}")
logging.info(f"成功保存向量化文档到{index_name}, ID: {res['_id']}, 文本长度: {len(text)}")
return res['_id']
except Exception as e:
print(f"保存失败: {str(e)}")
logging.error(f"向量保存失败: {str(e)}", exc_info=True)
raise
def save_raw_text(text, index_name="raw_texts"):

Loading…
Cancel
Save