fix int4 and int8 model init bug

52c6f2d2 · imClumsyPanda · eeea00e2 · 52c6f2d2 · 52c6f2d2
--- a/configs/model_config.py
+++ b/configs/model_config.py
@@ -65,8 +65,18 @@ llm_model_dict = {
    }
 }
-# LLM model name
+# LLM 名称
 LLM_MODEL = "chatglm-6b"
+# 如果你需要加载本地的model，指定这个参数  ` --no-remote-model`，或者下方参数修改为 `True`
+NO_REMOTE_MODEL = False
+# 量化加载8bit 模型
+LOAD_IN_8BIT = False
+# Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.
+BF16 = False
+# 本地模型存放的位置
+MODEL_DIR = "model/"
+# 本地lora存放的位置
+LORA_DIR = "loras/"
 # LLM lora path，默认为空，如果有请直接指定文件夹路径
 LLM_LORA_PATH = ""

--- a/models/loader/args.py
+++ b/models/loader/args.py
 import argparse
 import os
+from configs.model_config import *
 # Additional argparse types
@@ -32,28 +32,25 @@ def dir_path(string):
 parser = argparse.ArgumentParser(prog='langchina-ChatGLM',
-                                 description='基于langchain和chatGML的LLM文档阅读器')
+                                 description='About langchain-ChatGLM, local knowledge based ChatGLM with langchain ｜ '
+                                             '基于本地知识库的 ChatGLM 问答')
+parser.add_argument('--no-remote-model', action='store_true', default=NO_REMOTE_MODEL, help='remote in the model on '
-parser.add_argument('--no-remote-model',  action='store_true', default=False,  help='remote in the model on loader checkpoint, if your load local model to add the ` --no-remote-model`')
+                                                                                            'loader checkpoint, '
-parser.add_argument('--model', type=str, default='chatglm-6b', help='Name of the model to load by default.')
+                                                                                            'if your load local '
+                                                                                            'model to add the ` '
+                                                                                            '--no-remote-model`')
+parser.add_argument('--model', type=str, default=LLM_MODEL, help='Name of the model to load by default.')
 parser.add_argument('--lora', type=str, help='Name of the LoRA to apply to the model by default.')
-parser.add_argument("--model-dir", type=str, default='model/', help="Path to directory with all the models")
+parser.add_argument("--model-dir", type=str, default=MODEL_DIR, help="Path to directory with all the models")
-parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras")
+parser.add_argument("--lora-dir", type=str, default=LORA_DIR, help="Path to directory with all the loras")
 # Accelerate/transformers
-parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text. Warning: Training on CPU is extremely slow.')
+parser.add_argument('--load-in-8bit', action='store_true', default=LOAD_IN_8BIT,
-parser.add_argument('--auto-devices', action='store_true', help='Automatically split the model across the available GPU(s) and CPU.')
+                    help='Load the model with 8-bit precision.')
-parser.add_argument('--gpu-memory', type=str, nargs="+", help='Maxmimum GPU memory in GiB to be allocated per GPU. Example: --gpu-memory 10 for a single GPU, --gpu-memory 10 5 for two GPUs. You can also set values in MiB like --gpu-memory 3500MiB.')
+parser.add_argument('--bf16', action='store_true', default=BF16,
-parser.add_argument('--cpu-memory', type=str, help='Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.')
+                    help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
-parser.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision.')
-parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
 args = parser.parse_args([])
 # Generares dict with a default value for each argument
 DEFAULT_ARGS = vars(args)