提交 52c6f2d2 作者: imClumsyPanda

fix int4 and int8 model init bug

上级 eeea00e2
...@@ -65,8 +65,18 @@ llm_model_dict = { ...@@ -65,8 +65,18 @@ llm_model_dict = {
} }
} }
# LLM model name # LLM 名称
LLM_MODEL = "chatglm-6b" LLM_MODEL = "chatglm-6b"
# 如果你需要加载本地的model,指定这个参数 ` --no-remote-model`,或者下方参数修改为 `True`
NO_REMOTE_MODEL = False
# 量化加载8bit 模型
LOAD_IN_8BIT = False
# Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.
BF16 = False
# 本地模型存放的位置
MODEL_DIR = "model/"
# 本地lora存放的位置
LORA_DIR = "loras/"
# LLM lora path,默认为空,如果有请直接指定文件夹路径 # LLM lora path,默认为空,如果有请直接指定文件夹路径
LLM_LORA_PATH = "" LLM_LORA_PATH = ""
......
import argparse import argparse
import os import os
from configs.model_config import *
# Additional argparse types # Additional argparse types
...@@ -32,28 +32,25 @@ def dir_path(string): ...@@ -32,28 +32,25 @@ def dir_path(string):
parser = argparse.ArgumentParser(prog='langchina-ChatGLM', parser = argparse.ArgumentParser(prog='langchina-ChatGLM',
description='基于langchain和chatGML的LLM文档阅读器') description='About langchain-ChatGLM, local knowledge based ChatGLM with langchain | '
'基于本地知识库的 ChatGLM 问答')
parser.add_argument('--no-remote-model', action='store_true', default=NO_REMOTE_MODEL, help='remote in the model on '
parser.add_argument('--no-remote-model', action='store_true', default=False, help='remote in the model on loader checkpoint, if your load local model to add the ` --no-remote-model`') 'loader checkpoint, '
parser.add_argument('--model', type=str, default='chatglm-6b', help='Name of the model to load by default.') 'if your load local '
'model to add the ` '
'--no-remote-model`')
parser.add_argument('--model', type=str, default=LLM_MODEL, help='Name of the model to load by default.')
parser.add_argument('--lora', type=str, help='Name of the LoRA to apply to the model by default.') parser.add_argument('--lora', type=str, help='Name of the LoRA to apply to the model by default.')
parser.add_argument("--model-dir", type=str, default='model/', help="Path to directory with all the models") parser.add_argument("--model-dir", type=str, default=MODEL_DIR, help="Path to directory with all the models")
parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras") parser.add_argument("--lora-dir", type=str, default=LORA_DIR, help="Path to directory with all the loras")
# Accelerate/transformers # Accelerate/transformers
parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text. Warning: Training on CPU is extremely slow.') parser.add_argument('--load-in-8bit', action='store_true', default=LOAD_IN_8BIT,
parser.add_argument('--auto-devices', action='store_true', help='Automatically split the model across the available GPU(s) and CPU.') help='Load the model with 8-bit precision.')
parser.add_argument('--gpu-memory', type=str, nargs="+", help='Maxmimum GPU memory in GiB to be allocated per GPU. Example: --gpu-memory 10 for a single GPU, --gpu-memory 10 5 for two GPUs. You can also set values in MiB like --gpu-memory 3500MiB.') parser.add_argument('--bf16', action='store_true', default=BF16,
parser.add_argument('--cpu-memory', type=str, help='Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.') help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
parser.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision.')
parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
args = parser.parse_args([]) args = parser.parse_args([])
# Generares dict with a default value for each argument # Generares dict with a default value for each argument
DEFAULT_ARGS = vars(args) DEFAULT_ARGS = vars(args)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论