'commit'

4 weeks ago · 8e7f71181c
parent e544a5c1e8
commit 8e7f71181c
1 changed files with 6 additions and 3 deletions
--- a/dsRag/Milvus/X4_InsertData.py
+++ b/dsRag/Milvus/X4_InsertData.py
@ -6,6 +6,9 @@ import jieba
 import os
 import time

+# 需要进行标记的标签
+selectedTags = ["MATH_DATA_2", "小学数学"]
+
 # 1. 加载预训练的 Word2Vec 模型
 model_path = MS_MODEL_PATH
 model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=MS_MODEL_LIMIT)
@ -40,16 +43,16 @@ for filename in os.listdir(txt_dir):
            first_line = f.readline().strip()
            # 读取全部内容用于后续查询
            full_content = first_line + '\n' + f.read()
-            
+
            if not first_line:
                print(f"跳过空文件: {filename}")
                continue
-                
+
            print(f"正在处理文件: {filename}")

            # 5. 获取当前时间和会话ID
            timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-            tags = {"tags": ["MATH_DATA_1", "小学数学"], "full_content": full_content} # 添加完整内容
+            tags = {"tags": selectedTags, "full_content": full_content}  # 添加完整内容

            # 6. 将第一行文本转换为嵌入向量
            embedding = text_to_embedding(first_line)