'commit'
This commit is contained in:
@@ -1,13 +1,14 @@
|
|||||||
import warnings
|
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from Config import Config
|
import warnings
|
||||||
|
|
||||||
from elasticsearch import Elasticsearch
|
from elasticsearch import Elasticsearch
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from Util.VectorUtil import text_to_vector_db # 导入向量化工具函数
|
|
||||||
from langchain_openai import OpenAIEmbeddings # 直接导入嵌入模型
|
from langchain_openai import OpenAIEmbeddings # 直接导入嵌入模型
|
||||||
from pydantic import SecretStr # 用于包装API密钥
|
from pydantic import SecretStr # 用于包装API密钥
|
||||||
|
|
||||||
|
from Config import Config
|
||||||
|
|
||||||
# 抑制HTTPS相关警告
|
# 抑制HTTPS相关警告
|
||||||
warnings.filterwarnings('ignore', message='Connecting to .* using TLS with verify_certs=False is insecure')
|
warnings.filterwarnings('ignore', message='Connecting to .* using TLS with verify_certs=False is insecure')
|
||||||
warnings.filterwarnings('ignore', message='Unverified HTTPS request is being made to host')
|
warnings.filterwarnings('ignore', message='Unverified HTTPS request is being made to host')
|
||||||
@@ -96,36 +97,6 @@ def insert_long_text_to_es(long_text: str, tags: list = None) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def process_text_directory(txt_dir: str) -> None:
|
|
||||||
"""
|
|
||||||
处理指定目录下的所有文本文件,将其向量化并插入到Elasticsearch
|
|
||||||
|
|
||||||
参数:
|
|
||||||
txt_dir: 包含文本文件的目录路径
|
|
||||||
"""
|
|
||||||
for filename in os.listdir(txt_dir):
|
|
||||||
if filename.endswith('.txt'):
|
|
||||||
filepath = os.path.join(txt_dir, filename)
|
|
||||||
with open(filepath, 'r', encoding='utf-8') as f:
|
|
||||||
full_content = f.read()
|
|
||||||
|
|
||||||
if not full_content:
|
|
||||||
print(f"跳过空文件: {filename}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"正在处理文件: {filename}")
|
|
||||||
|
|
||||||
# 提取标签
|
|
||||||
x = filename.split("_")
|
|
||||||
if len(x) >= 2:
|
|
||||||
selected_tags = [x[0] + "_" + x[1]]
|
|
||||||
else:
|
|
||||||
selected_tags = ["uncategorized"]
|
|
||||||
|
|
||||||
# 插入文本
|
|
||||||
insert_long_text_to_es(full_content, selected_tags)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# 示例1:插入单个长文本
|
# 示例1:插入单个长文本
|
||||||
long_text = """混凝土是一种广泛使用的建筑材料,由水泥、砂、石子和水混合而成。它具有高强度、耐久性和良好的可塑性,被广泛应用于建筑、桥梁、道路等土木工程领域。
|
long_text = """混凝土是一种广泛使用的建筑材料,由水泥、砂、石子和水混合而成。它具有高强度、耐久性和良好的可塑性,被广泛应用于建筑、桥梁、道路等土木工程领域。
|
||||||
|
Reference in New Issue
Block a user