main
HuangHai 4 weeks ago
parent 8a8028e223
commit 68640c53ce

@ -9,9 +9,7 @@ from Util.SplitDocxUtil import SplitDocxUtil
# 加载预训练模型和分词器
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
#model = AutoModel.from_pretrained('bert-base-uncased')
model = LlamaForCausalLM.from_pretrained("./path/to/local/directory", local_files_only=True)
model = LlamaForCausalLM.from_pretrained("d:/Model/google-bert/bert-base-uncased", local_files_only=True)
def split_into_blocks(text):
"""使用正则表达式匹配问题和话题的标题及内容"""

@ -1,27 +0,0 @@
"""
pip install huggingface_hub
pip install pysocks
pip install hf_xet
开VPN后,使用Python下载模型
"""
import os
from transformers import AutoModel, AutoTokenizer
# 设置环境变量
os.environ['HTTP_PROXY'] = 'socks5://127.0.0.1:1080'
os.environ['HTTPS_PROXY'] = 'socks5://127.0.0.1:1080'
# 配置代理
proxies = {
'http': 'socks5://127.0.0.1:1080',
'https': 'socks5://127.0.0.1:1080'
}
# 加载模型
model_id = "google-bert/bert-base-uncased"
model = AutoModel.from_pretrained(model_id, proxies=proxies)
tokenizer = AutoTokenizer.from_pretrained(model_id, proxies=proxies)
# 保存模型到本地
model.save_pretrained("d:/Model/google-bert/bert-base-uncased")
tokenizer.save_pretrained("d:/Model/google-bert/bert-base-uncased")
Loading…
Cancel
Save