Unverified 提交 7d37dc87 作者: Zhi-guo Huang 提交者: GitHub

Merge pull request #904 from bones-zhu/dev

1 修复model_config.py的type;2 修改默认的LLM_model;3. 移除chatglm_llm.py的无用模块
......@@ -203,7 +203,7 @@ llm_model_dict = {
}
# LLM 名称
LLM_MODEL = "fastchat-chatglm-6b-int4"
LLM_MODEL = "fastchat-chatglm"
# 量化加载8bit 模型
LOAD_IN_8BIT = False
# Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.
......@@ -220,7 +220,7 @@ STREAMING = True
# Use p-tuning-v2 PrefixEncoder
USE_PTUNING_V2 = False
PTUNING_DIR='./ptuing-v2'
PTUNING_DIR='./ptuning-v2'
# LLM running device
LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
......
......@@ -2,14 +2,14 @@ from abc import ABC
from langchain.chains.base import Chain
from typing import Any, Dict, List, Optional, Generator
from langchain.callbacks.manager import CallbackManagerForChainRun
from transformers.generation.logits_process import LogitsProcessor
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList
# from transformers.generation.logits_process import LogitsProcessor
# from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList
from models.loader import LoaderCheckPoint
from models.base import (BaseAnswer,
AnswerResult,
AnswerResultStream,
AnswerResultQueueSentinelTokenListenerQueue)
import torch
# import torch
import transformers
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论