diff --git a/dsSchoolBuddy/ElasticSearch/T1_RebuildMapping.py b/dsSchoolBuddy/ElasticSearch/T1_RebuildMapping.py index 075c8263..a587e62c 100644 --- a/dsSchoolBuddy/ElasticSearch/T1_RebuildMapping.py +++ b/dsSchoolBuddy/ElasticSearch/T1_RebuildMapping.py @@ -1,8 +1,7 @@ from Config import Config from ElasticSearch.Utils.EsSearchUtil import EsSearchUtil, disableWarning -# 禁用警告 -disableWarning() + # 创建EsSearchUtil实例 search_util = EsSearchUtil(Config.ES_CONFIG) diff --git a/dsSchoolBuddy/ElasticSearch/Utils/EsSearchUtil.py b/dsSchoolBuddy/ElasticSearch/Utils/EsSearchUtil.py index 9be86ef5..c93039b6 100644 --- a/dsSchoolBuddy/ElasticSearch/Utils/EsSearchUtil.py +++ b/dsSchoolBuddy/ElasticSearch/Utils/EsSearchUtil.py @@ -18,9 +18,6 @@ def disableWarning(): warnings.filterwarnings('ignore', message='Connecting to .* using TLS with verify_certs=False is insecure') warnings.filterwarnings('ignore', message='Unverified HTTPS request is being made to host') -# 初始化配置 -disableWarning() - # 初始化日志 logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -31,6 +28,8 @@ class EsSearchUtil: 初始化Elasticsearch搜索工具 :param es_config: Elasticsearch配置字典,包含hosts, username, password, index_name等 """ + # 禁用警告 + disableWarning() self.es_config = es_config # 初始化连接池 @@ -154,16 +153,15 @@ def insert_long_text_to_es(long_text: str, tags: list = None) -> bool: bool: 插入是否成功 """ try: - # 1. 初始化Elasticsearch连接 - es = Elasticsearch( - hosts=Config.ES_CONFIG['hosts'], - basic_auth=Config.ES_CONFIG['basic_auth'], - verify_certs=False - ) + # 1. 创建EsSearchUtil实例以使用连接池 + search_util = EsSearchUtil(Config.ES_CONFIG) + + # 2. 从连接池获取连接 + conn = search_util.es_pool.get_connection() - # 2. 检查索引是否存在,不存在则创建 + # 3. 检查索引是否存在,不存在则创建 index_name = Config.ES_CONFIG['index_name'] - if not es.indices.exists(index=index_name): + if not conn.indices.exists(index=index_name): # 定义mapping结构 mapping = { "mappings": { @@ -186,33 +184,33 @@ def insert_long_text_to_es(long_text: str, tags: list = None) -> bool: } } } - es.indices.create(index=index_name, body=mapping) + conn.indices.create(index=index_name, body=mapping) print(f"索引 '{index_name}' 创建成功") - # 3. 切割文本 + # 4. 切割文本 text_chunks = split_text_into_chunks(long_text) - # 4. 准备标签 + # 5. 准备标签 if tags is None: tags = ["general_text"] - # 5. 获取当前时间 + # 6. 获取当前时间 timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - # 6. 创建嵌入模型 + # 7. 创建嵌入模型 embeddings = OpenAIEmbeddings( model=Config.EMBED_MODEL_NAME, base_url=Config.EMBED_BASE_URL, api_key=SecretStr(Config.EMBED_API_KEY) ) - # 7. 为每个文本块生成向量并插入 + # 8. 为每个文本块生成向量并插入 for i, chunk in enumerate(text_chunks): # 生成文本块的哈希值作为文档ID doc_id = hashlib.md5(chunk.encode('utf-8')).hexdigest() # 检查文档是否已存在 - if es.exists(index=index_name, id=doc_id): + if conn.exists(index=index_name, id=doc_id): print(f"文档块 {i+1} 已存在,跳过插入: {doc_id}") continue @@ -228,13 +226,17 @@ def insert_long_text_to_es(long_text: str, tags: list = None) -> bool: } # 插入数据到Elasticsearch - es.index(index=index_name, id=doc_id, document=doc) + conn.index(index=index_name, id=doc_id, document=doc) print(f"文档块 {i+1} 插入成功: {doc_id}") return True except Exception as e: print(f"插入数据失败: {e}") return False + finally: + # 确保释放连接回连接池 + if 'conn' in locals() and 'search_util' in locals(): + search_util.es_pool.release_connection(conn) diff --git a/dsSchoolBuddy/ElasticSearch/Utils/__pycache__/EsSearchUtil.cpython-310.pyc b/dsSchoolBuddy/ElasticSearch/Utils/__pycache__/EsSearchUtil.cpython-310.pyc index 8fa38d14..eefb7dbd 100644 Binary files a/dsSchoolBuddy/ElasticSearch/Utils/__pycache__/EsSearchUtil.cpython-310.pyc and b/dsSchoolBuddy/ElasticSearch/Utils/__pycache__/EsSearchUtil.cpython-310.pyc differ