|
|
|
@ -6,6 +6,9 @@ import jieba
|
|
|
|
|
import os
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
# 需要进行标记的标签
|
|
|
|
|
selectedTags = ["MATH_DATA_2", "小学数学"]
|
|
|
|
|
|
|
|
|
|
# 1. 加载预训练的 Word2Vec 模型
|
|
|
|
|
model_path = MS_MODEL_PATH
|
|
|
|
|
model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=MS_MODEL_LIMIT)
|
|
|
|
@ -49,7 +52,7 @@ for filename in os.listdir(txt_dir):
|
|
|
|
|
|
|
|
|
|
# 5. 获取当前时间和会话ID
|
|
|
|
|
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
|
|
|
|
tags = {"tags": ["MATH_DATA_1", "小学数学"], "full_content": full_content} # 添加完整内容
|
|
|
|
|
tags = {"tags": selectedTags, "full_content": full_content} # 添加完整内容
|
|
|
|
|
|
|
|
|
|
# 6. 将第一行文本转换为嵌入向量
|
|
|
|
|
embedding = text_to_embedding(first_line)
|
|
|
|
|