|
|
@ -2,7 +2,6 @@ import datetime
|
|
|
|
import logging
|
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
from tqdm import tqdm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from Config.Config import ES_CONFIG
|
|
|
|
from Config.Config import ES_CONFIG
|
|
|
|
from Util.EmbeddingUtil import text_to_embedding
|
|
|
|
from Util.EmbeddingUtil import text_to_embedding
|
|
|
@ -51,7 +50,7 @@ def process_file(file_path):
|
|
|
|
|
|
|
|
|
|
|
|
paragraphs = split_paragraphs(content)
|
|
|
|
paragraphs = split_paragraphs(content)
|
|
|
|
|
|
|
|
|
|
|
|
for paragraph in tqdm(paragraphs, desc='处理进度', unit='段'):
|
|
|
|
for paragraph in paragraphs:
|
|
|
|
save_to_es(paragraph)
|
|
|
|
save_to_es(paragraph)
|
|
|
|
|
|
|
|
|
|
|
|
print(f"\n处理完成,共保存{len(paragraphs)}个段落")
|
|
|
|
print(f"\n处理完成,共保存{len(paragraphs)}个段落")
|
|
|
|