Merge pull request #17 from myml/memory

fix: 修复chatglm模型被复制，显存占用过多

Merge pull request #17 from myml/memory
fix: 修复chatglm模型被复制，显存占用过多
63d90060 · imClumsyPanda · GitHub · 51c44e3e · bed03a6f · 63d90060
--- a/chatglm_llm.py
+++ b/chatglm_llm.py
@@ -15,6 +15,8 @@ model = (
    .cuda()
 )
+tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
+model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
 class ChatGLM(LLM):
    max_token: int = 10000