提交 3324c12d 作者: glide-the

增加cpu加载模型逻辑

上级 561c40af
...@@ -130,11 +130,8 @@ class LoaderCheckPoint: ...@@ -130,11 +130,8 @@ class LoaderCheckPoint:
model = dispatch_model(model, device_map=self.device_map) model = dispatch_model(model, device_map=self.device_map)
else: else:
# print(
# "Warning: torch.cuda.is_available() returned False.\nThis means that no GPU has been "
# "detected.\nFalling back to CPU mode.\n")
model = ( model = (
AutoModel.from_pretrained( LoaderClass.from_pretrained(
checkpoint, checkpoint,
config=self.model_config, config=self.model_config,
trust_remote_code=True) trust_remote_code=True)
...@@ -202,7 +199,11 @@ class LoaderCheckPoint: ...@@ -202,7 +199,11 @@ class LoaderCheckPoint:
) from exc ) from exc
# Custom # Custom
else: else:
pass
print(
"Warning: self.llm_device is False.\nThis means that no use GPU bring to be load CPU mode\n")
params = {"low_cpu_mem_usage": True, "torch_dtype": torch.float32, "trust_remote_code": True}
model = LoaderClass.from_pretrained(checkpoint, **params).to(self.llm_device, dtype=float)
# Loading the tokenizer # Loading the tokenizer
if type(model) is transformers.LlamaForCausalLM: if type(model) is transformers.LlamaForCausalLM:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论