init
This commit is contained in:
65
Test100.py
Normal file
65
Test100.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# pip install bitsandbytes
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
||||
import torch
|
||||
import threading
|
||||
|
||||
from transformers import BitsAndBytesConfig
|
||||
bnb_config = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
bnb_4bit_use_double_quant=True,
|
||||
bnb_4bit_quant_type="nf4"
|
||||
)
|
||||
|
||||
# model_name = "netease-youdao/Confucius3-Math"
|
||||
model_name = r"D:\Confucius3-Math\netease-youdao\Confucius3-Math"
|
||||
|
||||
SYSTEM_PROMPT_TEMPLATE = """用户与助手之间的对话。用户提出一个问题,助手予以解答。助手先在脑海中思考推理过程,然后为用户提供答案。推理过程和答案分别用 <think> </think> 和 <answer> </answer> 标签括起来,即 <think> 这里的推理过程 </think> <answer> 这里的答案 </answer> 。"""
|
||||
|
||||
USER_PROMPT_TEMPLATE = """{question}"""
|
||||
|
||||
question = "1+1=?"
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name,
|
||||
# torch_dtype="auto",
|
||||
quantization_config=bnb_config,
|
||||
device_map="auto"
|
||||
)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
messages = [
|
||||
{'role': 'system', 'content': SYSTEM_PROMPT_TEMPLATE},
|
||||
{'role': 'user', 'content': USER_PROMPT_TEMPLATE.format(question=question)},
|
||||
]
|
||||
|
||||
text = tokenizer.apply_chat_template(
|
||||
messages,
|
||||
tokenize=False,
|
||||
add_generation_prompt=True
|
||||
)
|
||||
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
||||
|
||||
# 创建流式输出器
|
||||
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True, timeout=10.0)
|
||||
|
||||
# 设置生成参数,添加streamer
|
||||
generation_kwargs = {
|
||||
**model_inputs,
|
||||
"streamer": streamer,
|
||||
"max_new_tokens": 32768
|
||||
}
|
||||
|
||||
# 创建线程来处理流式输出
|
||||
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
|
||||
thread.start()
|
||||
|
||||
# 流式输出结果
|
||||
print("流式输出开始:")
|
||||
for chunk in streamer:
|
||||
if chunk:
|
||||
print(chunk, end="", flush=True)
|
||||
|
||||
thread.join()
|
||||
print("\n流式输出结束")
|
Reference in New Issue
Block a user