Files
dsProject/dsLightRag/Test/Test100.py
2025-08-14 15:45:08 +08:00

65 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# pip install bitsandbytes
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import torch
import threading
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)
# model_name = "netease-youdao/Confucius3-Math"
model_name = r"D:\Confucius3-Math\netease-youdao\Confucius3-Math"
SYSTEM_PROMPT_TEMPLATE = """用户与助手之间的对话。用户提出一个问题,助手予以解答。助手先在脑海中思考推理过程,然后为用户提供答案。推理过程和答案分别用 <think> </think> 和 <answer> </answer> 标签括起来,即 <think> 这里的推理过程 </think> <answer> 这里的答案 </answer> 。"""
USER_PROMPT_TEMPLATE = """{question}"""
question = "1+1=?"
model = AutoModelForCausalLM.from_pretrained(
model_name,
# torch_dtype="auto",
quantization_config=bnb_config,
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
messages = [
{'role': 'system', 'content': SYSTEM_PROMPT_TEMPLATE},
{'role': 'user', 'content': USER_PROMPT_TEMPLATE.format(question=question)},
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
# 创建流式输出器
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True, timeout=10.0)
# 设置生成参数添加streamer
generation_kwargs = {
**model_inputs,
"streamer": streamer,
"max_new_tokens": 32768
}
# 创建线程来处理流式输出
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
# 流式输出结果
print("流式输出开始:")
for chunk in streamer:
if chunk:
print(chunk, end="", flush=True)
thread.join()
print("\n流式输出结束")