ChatGLM的CPU推理方案

1
2
3
4
5
6
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b-int4", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm2-6b-int4", trust_remote_code=True).half().float()
model = model.eval()
response, history = model.chat(tokenizer, "写一首赞美雪的现代诗", history=[])
print(response)