From 52369e2b21fc4e833a9b9051c5af026774b6ece9 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Tue, 19 Aug 2025 08:16:58 +0800 Subject: [PATCH] 'commit' --- dsSchoolBuddy/ElasticSearch/T3_InsertData.py | 37 +++----------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/dsSchoolBuddy/ElasticSearch/T3_InsertData.py b/dsSchoolBuddy/ElasticSearch/T3_InsertData.py index f1b63655..11d964fe 100644 --- a/dsSchoolBuddy/ElasticSearch/T3_InsertData.py +++ b/dsSchoolBuddy/ElasticSearch/T3_InsertData.py @@ -1,13 +1,14 @@ -import warnings import os import time -from Config import Config +import warnings + from elasticsearch import Elasticsearch from langchain_core.documents import Document -from Util.VectorUtil import text_to_vector_db # 导入向量化工具函数 from langchain_openai import OpenAIEmbeddings # 直接导入嵌入模型 from pydantic import SecretStr # 用于包装API密钥 +from Config import Config + # 抑制HTTPS相关警告 warnings.filterwarnings('ignore', message='Connecting to .* using TLS with verify_certs=False is insecure') warnings.filterwarnings('ignore', message='Unverified HTTPS request is being made to host') @@ -96,36 +97,6 @@ def insert_long_text_to_es(long_text: str, tags: list = None) -> bool: return False -def process_text_directory(txt_dir: str) -> None: - """ - 处理指定目录下的所有文本文件,将其向量化并插入到Elasticsearch - - 参数: - txt_dir: 包含文本文件的目录路径 - """ - for filename in os.listdir(txt_dir): - if filename.endswith('.txt'): - filepath = os.path.join(txt_dir, filename) - with open(filepath, 'r', encoding='utf-8') as f: - full_content = f.read() - - if not full_content: - print(f"跳过空文件: {filename}") - continue - - print(f"正在处理文件: {filename}") - - # 提取标签 - x = filename.split("_") - if len(x) >= 2: - selected_tags = [x[0] + "_" + x[1]] - else: - selected_tags = ["uncategorized"] - - # 插入文本 - insert_long_text_to_es(full_content, selected_tags) - - def main(): # 示例1:插入单个长文本 long_text = """混凝土是一种广泛使用的建筑材料,由水泥、砂、石子和水混合而成。它具有高强度、耐久性和良好的可塑性,被广泛应用于建筑、桥梁、道路等土木工程领域。