diff --git a/dsRag/T5_SentenceSave.py b/dsRag/T5_SentenceSave.py index 92198c03..8363d826 100644 --- a/dsRag/T5_SentenceSave.py +++ b/dsRag/T5_SentenceSave.py @@ -1,6 +1,17 @@ +import logging from datetime import datetime from elasticsearch import Elasticsearch +# 配置日志 +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('sentence_save.log'), + logging.StreamHandler() + ] +) + from Config.Config import ES_CONFIG from T2_Txt2Vec import text_to_embedding @@ -38,22 +49,20 @@ def check_text_exists(text, index_name="raw_texts"): def save_vector(text, index_name="knowledge_base"): """将文本向量化后保存到ES""" try: - # 向量化文本 + logging.info(f"开始向量化文本: {text[:50]}...") vector = text_to_embedding(text) - # 准备文档 doc = { "text": text, "vector": vector, "timestamp": datetime.now() } - # 保存到ES res = es.index(index=index_name, document=doc) - print(f"向量化文档已保存,ID: {res['_id']}") + logging.info(f"成功保存向量化文档到{index_name}, ID: {res['_id']}, 文本长度: {len(text)}") return res['_id'] except Exception as e: - print(f"保存失败: {str(e)}") + logging.error(f"向量保存失败: {str(e)}", exc_info=True) raise def save_raw_text(text, index_name="raw_texts"):