提交 99440167 作者: glide-the

拓展功能检查依赖提示安装

上级 1c5f71be
...@@ -4,18 +4,11 @@ import os ...@@ -4,18 +4,11 @@ import os
import re import re
import time import time
from pathlib import Path from pathlib import Path
from peft import PeftModel
from typing import Optional, List, Dict, Tuple, Union from typing import Optional, List, Dict, Tuple, Union
import torch import torch
import transformers import transformers
from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM, from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM,
AutoTokenizer, BitsAndBytesConfig, LlamaTokenizer) AutoTokenizer, LlamaTokenizer)
from transformers.dynamic_module_utils import get_class_from_dynamic_module
from transformers.modeling_utils import no_init_weights
from transformers.utils import ContextManagers
from accelerate import init_empty_weights
from accelerate.utils import get_balanced_memory, infer_auto_device_map
from configs.model_config import LLM_DEVICE from configs.model_config import LLM_DEVICE
...@@ -104,7 +97,7 @@ class LoaderCheckPoint: ...@@ -104,7 +97,7 @@ class LoaderCheckPoint:
LoaderClass = AutoModelForCausalLM LoaderClass = AutoModelForCausalLM
# Load the model in simple 16-bit mode by default # Load the model in simple 16-bit mode by default
if not any([self.llm_device.lower()=="cpu", if not any([self.llm_device.lower() == "cpu",
self.load_in_8bit, self.is_llamacpp]): self.load_in_8bit, self.is_llamacpp]):
if torch.cuda.is_available() and self.llm_device.lower().startswith("cuda"): if torch.cuda.is_available() and self.llm_device.lower().startswith("cuda"):
...@@ -150,16 +143,35 @@ class LoaderCheckPoint: ...@@ -150,16 +143,35 @@ class LoaderCheckPoint:
) )
elif self.is_llamacpp: elif self.is_llamacpp:
try:
from models.extensions.llamacpp_model_alternative import LlamaCppModel from models.extensions.llamacpp_model_alternative import LlamaCppModel
except ImportError as exc:
raise ValueError(
"Could not import depend python package "
"Please install it with `pip install llama-cpp-python`."
) from exc
model_file = list(checkpoint.glob('ggml*.bin'))[0] model_file = list(checkpoint.glob('ggml*.bin'))[0]
print(f"llama.cpp weights detected: {model_file}\n") print(f"llama.cpp weights detected: {model_file}\n")
model, tokenizer = LlamaCppModel.from_pretrained(model_file) model, tokenizer = LlamaCppModel.from_pretrained(model_file)
return model, tokenizer return model, tokenizer
# Custom elif self.load_in_8bit:
else: try:
from accelerate import init_empty_weights
from accelerate.utils import get_balanced_memory, infer_auto_device_map
from transformers import BitsAndBytesConfig
except ImportError as exc:
raise ValueError(
"Could not import depend python package "
"Please install it with `pip install transformers` "
"`pip install bitsandbytes``pip install accelerate`."
) from exc
params = {"low_cpu_mem_usage": True} params = {"low_cpu_mem_usage": True}
if not self.llm_device.lower().startswith("cuda"): if not self.llm_device.lower().startswith("cuda"):
...@@ -167,18 +179,11 @@ class LoaderCheckPoint: ...@@ -167,18 +179,11 @@ class LoaderCheckPoint:
else: else:
params["device_map"] = 'auto' params["device_map"] = 'auto'
params["trust_remote_code"] = True params["trust_remote_code"] = True
if self.load_in_8bit:
params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True, params['quantization_config'] = BitsAndBytesConfig(load_in_8bit=True,
llm_int8_enable_fp32_cpu_offload=False) llm_int8_enable_fp32_cpu_offload=False)
elif self.bf16:
params["torch_dtype"] = torch.bfloat16
else:
params["torch_dtype"] = torch.float16
if self.load_in_8bit and params.get('max_memory', None) is not None and params['device_map'] == 'auto':
config = AutoConfig.from_pretrained(checkpoint)
with init_empty_weights(): with init_empty_weights():
model = LoaderClass.from_config(config) model = LoaderClass.from_config(self.model_config,trust_remote_code = True)
model.tie_weights() model.tie_weights()
if self.device_map is not None: if self.device_map is not None:
params['device_map'] = self.device_map params['device_map'] = self.device_map
...@@ -186,11 +191,18 @@ class LoaderCheckPoint: ...@@ -186,11 +191,18 @@ class LoaderCheckPoint:
params['device_map'] = infer_auto_device_map( params['device_map'] = infer_auto_device_map(
model, model,
dtype=torch.int8, dtype=torch.int8,
max_memory=params['max_memory'],
no_split_module_classes=model._no_split_modules no_split_module_classes=model._no_split_modules
) )
try:
model = LoaderClass.from_pretrained(checkpoint, **params) model = LoaderClass.from_pretrained(checkpoint, **params)
except ImportError as exc:
raise ValueError(
"如果开启了8bit量化加载,项目无法启动,参考此位置,选择合适的cuda版本,https://github.com/TimDettmers/bitsandbytes/issues/156"
) from exc
# Custom
else:
pass
# Loading the tokenizer # Loading the tokenizer
if type(model) is transformers.LlamaForCausalLM: if type(model) is transformers.LlamaForCausalLM:
...@@ -247,6 +259,20 @@ class LoaderCheckPoint: ...@@ -247,6 +259,20 @@ class LoaderCheckPoint:
return device_map return device_map
def moss_auto_configure_device_map(self, num_gpus: int, model_name) -> Dict[str, int]: def moss_auto_configure_device_map(self, num_gpus: int, model_name) -> Dict[str, int]:
try:
from accelerate import init_empty_weights
from accelerate.utils import get_balanced_memory, infer_auto_device_map
from transformers.dynamic_module_utils import get_class_from_dynamic_module
from transformers.modeling_utils import no_init_weights
from transformers.utils import ContextManagers
except ImportError as exc:
raise ValueError(
"Could not import depend python package "
"Please install it with `pip install transformers` "
"`pip install bitsandbytes``pip install accelerate`."
) from exc
checkpoint = Path(f'{self.model_dir}/{model_name}') checkpoint = Path(f'{self.model_dir}/{model_name}')
if self.model_path: if self.model_path:
...@@ -271,6 +297,16 @@ class LoaderCheckPoint: ...@@ -271,6 +297,16 @@ class LoaderCheckPoint:
return device_map return device_map
def _add_lora_to_model(self, lora_names): def _add_lora_to_model(self, lora_names):
try:
from peft import PeftModel
except ImportError as exc:
raise ValueError(
"Could not import depend python package. "
"Please install it with `pip install peft``pip install accelerate`."
) from exc
# 目前加载的lora # 目前加载的lora
prior_set = set(self.lora_names) prior_set = set(self.lora_names)
# 需要加载的 # 需要加载的
......
...@@ -11,16 +11,16 @@ beautifulsoup4 ...@@ -11,16 +11,16 @@ beautifulsoup4
icetk icetk
cpm_kernels cpm_kernels
faiss-cpu faiss-cpu
accelerate~=0.18.0
gradio==3.28.3 gradio==3.28.3
fastapi~=0.95.0 fastapi~=0.95.0
uvicorn~=0.21.1 uvicorn~=0.21.1
peft~=0.3.0
pypinyin~=0.48.0 pypinyin~=0.48.0
click~=8.1.3 click~=8.1.3
tabulate tabulate
azure-core azure-core
bitsandbytes; platform_system != "Windows" #accelerate~=0.18.0
#peft~=0.3.0
#bitsandbytes; platform_system != "Windows"
#llama-cpp-python==0.1.34; platform_system != "Windows" #llama-cpp-python==0.1.34; platform_system != "Windows"
#https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.34/llama_cpp_python-0.1.34-cp310-cp310-win_amd64.whl; platform_system == "Windows" #https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.34/llama_cpp_python-0.1.34-cp310-cp310-win_amd64.whl; platform_system == "Windows"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论