提交 99440167 作者: glide-the

拓展功能检查依赖提示安装

上级 1c5f71be
......@@ -4,18 +4,11 @@ import os
import re
import time
from pathlib import Path
from peft import PeftModel
from typing import Optional, List, Dict, Tuple, Union
import torch
import transformers
from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM,
AutoTokenizer, BitsAndBytesConfig, LlamaTokenizer)
from transformers.dynamic_module_utils import get_class_from_dynamic_module
from transformers.modeling_utils import no_init_weights
from transformers.utils import ContextManagers
from accelerate import init_empty_weights
from accelerate.utils import get_balanced_memory, infer_auto_device_map
AutoTokenizer, LlamaTokenizer)
from configs.model_config import LLM_DEVICE
......@@ -104,7 +97,7 @@ class LoaderCheckPoint:
LoaderClass = AutoModelForCausalLM
# Load the model in simple 16-bit mode by default
if not any([self.llm_device.lower()=="cpu",
if not any([self.llm_device.lower() == "cpu",
self.load_in_8bit, self.is_llamacpp]):
if torch.cuda.is_available() and self.llm_device.lower().startswith("cuda"):
......@@ -150,7 +143,15 @@ class LoaderCheckPoint:
)
elif self.is_llamacpp:
from models.extensions.llamacpp_model_alternative import LlamaCppModel
try:
from models.extensions.llamacpp_model_alternative import LlamaCppModel
except ImportError as exc:
raise ValueError(
"Could not import depend python package "
"Please install it with `pip install llama-cpp-python`."
) from exc
model_file = list(checkpoint.glob('ggml*.bin'))[0]
print(f"llama.cpp weights detected: {model_file}\n")
......@@ -158,8 +159,19 @@ class LoaderCheckPoint:
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
return model, tokenizer
# Custom
else:
elif self.load_in_8bit:
try:
from accelerate import init_empty_weights
from accelerate.utils import get_balanced_memory, infer_auto_device_map
from transformers import BitsAndBytesConfig
except ImportError as exc:
raise ValueError(
"Could not import depend python package "
"Please install it with `pip install transformers` "
"`pip install bitsandbytes``pip install accelerate`."
) from exc
params = {"low_cpu_mem_usage": True}
if not self.llm_device.lower().startswith("cuda"):
......@@ -167,30 +179,30 @@ class LoaderCheckPoint:
else:
params["device_map"] = 'auto'
params["trust_remote_code"] = True
if self.load_in_8bit:
params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True,
llm_int8_enable_fp32_cpu_offload=False)
elif self.bf16:
params["torch_dtype"] = torch.bfloat16
else:
params["torch_dtype"] = torch.float16
if self.load_in_8bit and params.get('max_memory', None) is not None and params['device_map'] == 'auto':
config = AutoConfig.from_pretrained(checkpoint)
with init_empty_weights():
model = LoaderClass.from_config(config)
model.tie_weights()
if self.device_map is not None:
params['device_map'] = self.device_map
else:
params['device_map'] = infer_auto_device_map(
model,
dtype=torch.int8,
max_memory=params['max_memory'],
no_split_module_classes=model._no_split_modules
)
params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True,
llm_int8_enable_fp32_cpu_offload=False)
with init_empty_weights():
model = LoaderClass.from_config(self.model_config,trust_remote_code = True)
model.tie_weights()
if self.device_map is not None:
params['device_map'] = self.device_map
else:
params['device_map'] = infer_auto_device_map(
model,
dtype=torch.int8,
no_split_module_classes=model._no_split_modules
)
try:
model = LoaderClass.from_pretrained(checkpoint, **params)
model = LoaderClass.from_pretrained(checkpoint, **params)
except ImportError as exc:
raise ValueError(
"如果开启了8bit量化加载,项目无法启动,参考此位置,选择合适的cuda版本,https://github.com/TimDettmers/bitsandbytes/issues/156"
) from exc
# Custom
else:
pass
# Loading the tokenizer
if type(model) is transformers.LlamaForCausalLM:
......@@ -247,6 +259,20 @@ class LoaderCheckPoint:
return device_map
def moss_auto_configure_device_map(self, num_gpus: int, model_name) -> Dict[str, int]:
try:
from accelerate import init_empty_weights
from accelerate.utils import get_balanced_memory, infer_auto_device_map
from transformers.dynamic_module_utils import get_class_from_dynamic_module
from transformers.modeling_utils import no_init_weights
from transformers.utils import ContextManagers
except ImportError as exc:
raise ValueError(
"Could not import depend python package "
"Please install it with `pip install transformers` "
"`pip install bitsandbytes``pip install accelerate`."
) from exc
checkpoint = Path(f'{self.model_dir}/{model_name}')
if self.model_path:
......@@ -271,6 +297,16 @@ class LoaderCheckPoint:
return device_map
def _add_lora_to_model(self, lora_names):
try:
from peft import PeftModel
except ImportError as exc:
raise ValueError(
"Could not import depend python package. "
"Please install it with `pip install peft``pip install accelerate`."
) from exc
# 目前加载的lora
prior_set = set(self.lora_names)
# 需要加载的
......
......@@ -11,16 +11,16 @@ beautifulsoup4
icetk
cpm_kernels
faiss-cpu
accelerate~=0.18.0
gradio==3.28.3
fastapi~=0.95.0
uvicorn~=0.21.1
peft~=0.3.0
pypinyin~=0.48.0
click~=8.1.3
tabulate
azure-core
bitsandbytes; platform_system != "Windows"
#accelerate~=0.18.0
#peft~=0.3.0
#bitsandbytes; platform_system != "Windows"
#llama-cpp-python==0.1.34; platform_system != "Windows"
#https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.34/llama_cpp_python-0.1.34-cp310-cp310-win_amd64.whl; platform_system == "Windows"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论