'commit'

4 weeks ago · bebee721d3
parent 61f9571ed2
commit bebee721d3
2 changed files with 18 additions and 17 deletions
--- a/dsRag/T2_ImportTxt.py
+++ b/dsRag/T2_ImportTxt.py
@ -1,11 +1,11 @@
+import datetime
+import logging

-from Util.EmbeddingUtil import text_to_embedding  # 修改导入
-from Config.Config import ES_CONFIG
 from elasticsearch import Elasticsearch
-import re
 from tqdm import tqdm
-import datetime
-import logging
+
+from Config.Config import ES_CONFIG
+from Util.EmbeddingUtil import text_to_embedding

 # 在文件开头添加logger配置
 logger = logging.getLogger(__name__)
@ -17,13 +17,14 @@ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(messag
 handler.setFormatter(formatter)
 logger.addHandler(handler)

-def split_sentences(text):
-    """按句分割文本"""
+def split_paragraphs(text):
+    """按段落分割文本"""
+    # 按两个换行符分割段落
    paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
-    # 使用jieba进行分句
-    sentences = re.split(r'[。！？；\n]', text)  # 添加这行定义sentences
-    return [s.strip() for s in sentences if s.strip()]
+    return paragraphs

+# 修改process_file函数
+# Move save_to_es function definition before process_file
 def save_to_es(text):
    """保存向量化文本和原始文本到ES"""
    vector = text_to_embedding(text)
@ -45,18 +46,18 @@ def save_to_es(text):
    except Exception as e:
        logger.error(f"保存文本到ES失败: {e}")

+# Then define process_file function
 def process_file(file_path):
    """处理文本文件"""
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
-    sentences = split_sentences(content)
+    paragraphs = split_paragraphs(content)
    
-    # 添加进度条
-    for sentence in tqdm(sentences, desc='处理进度', unit='句'):
-        save_to_es(sentence)
+    for paragraph in tqdm(paragraphs, desc='处理进度', unit='段'):
+        save_to_es(paragraph)
    
-    print(f"\n处理完成，共保存{len(sentences)}个句子")
+    print(f"\n处理完成，共保存{len(paragraphs)}个段落")

 if __name__ == '__main__':
    es = Elasticsearch(
--- a/dsRag/T3_DeepSeekRag.py
+++ b/dsRag/T3_DeepSeekRag.py
@ -1,6 +1,6 @@
-'''
+"""
 pip install openai
-'''
+"""
 from elasticsearch import Elasticsearch
 from openai import OpenAI
 from Config import Config