Merge branch 'dev'

5d88f715 · imClumsyPanda · 9c6e6652 · d82af9ba · 5d88f715 · 5d88f715
--- a/Dockerfile
+++ b/Dockerfile
+FROM python:3.8
+
+MAINTAINER "chatGLM"
+
+COPY agent /chatGLM/agent
+
+COPY chains /chatGLM/chains
+
+COPY configs /chatGLM/configs
+
+COPY content /chatGLM/content
+
+COPY models /chatGLM/models
+
+COPY nltk_data /chatGLM/content
+
+COPY requirements.txt /chatGLM/
+
+COPY cli_demo.py /chatGLM/
+
+COPY webui.py /chatGLM/
+
+WORKDIR /chatGLM
+
+RUN pip install --user torch torchvision tensorboard cython -i https://pypi.tuna.tsinghua.edu.cn/simple
+# RUN pip install --user 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
+
+# RUN pip install --user 'git+https://github.com/facebookresearch/fvcore'
+# install detectron2
+# RUN git clone https://github.com/facebookresearch/detectron2
+
+RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/ --trusted-host pypi.tuna.tsinghua.edu.cn
+
+CMD ["python","-u", "webui.py"]
--- a/README.md
+++ b/README.md
@@ -4,11 +4,11 @@

 🌍 [_READ THIS IN ENGLISH_](README_en.md)

-🤖️ 一种利用 [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) + [langchain](https://github.com/hwchase17/langchain) 实现的基于本地知识的 ChatGLM 应用。
+🤖️ 一种利用 [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) + [langchain](https://github.com/hwchase17/langchain) 实现的基于本地知识的 ChatGLM 应用。增加 [clue-ai/ChatYuan](https://github.com/clue-ai/ChatYuan) 项目的模型 [ClueAI/ChatYuan-large-v2](https://huggingface.co/ClueAI/ChatYuan-large-v2) 的支持。

 💡 受 [GanymedeNil](https://github.com/GanymedeNil) 的项目 [document.ai](https://github.com/GanymedeNil/document.ai) 和 [AlexZhangji](https://github.com/AlexZhangji) 创建的 [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) 启发，建立了全部基于开源模型实现的本地知识问答应用。

-✅ 本项目中 Embedding 选用的是 [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main)，LLM 选用的是 [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B)。依托上述模型，本项目可实现全部使用**开源**模型**离线私有部署**。
+✅ 本项目中 Embedding 默认选用的是 [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main)，LLM 默认选用的是 [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B)。依托上述模型，本项目可实现全部使用**开源**模型**离线私有部署**。

 ⛓️ 本项目实现原理如下图所示，过程包括加载文件 -> 读取文本 -> 文本分割 -> 文本向量化 -> 问句向量化 -> 在文本向量中匹配出与问句向量最相似的`top k`个 -> 匹配出的文本作为上下文和问题一起添加到`prompt`中 -> 提交给`LLM`生成回答。

@@ -22,9 +22,7 @@

 参见 [变更日志](docs/CHANGELOG.md)。

-## 使用方式
-
-### 硬件需求
+## 硬件需求

 - ChatGLM-6B 模型硬件需求
  
@@ -38,9 +36,19 @@

    本项目中默认选用的 Embedding 模型 [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main) 约占用显存 3GB，也可修改为在 CPU 中运行。

+## Docker 部署
+
+```commandline
+$ docker build -t chatglm:v1.0 .
+
+$ docker run -d --restart=always --name chatglm -p 7860:7860 -v /www/wwwroot/code/langchain-ChatGLM:/chatGLM  chatglm
+```
+
+## 开发部署
+
 ### 软件需求

-本项目已在 Python 3.8，CUDA 11.7 环境下完成测试。
+本项目已在 Python 3.8 - 3.10，CUDA 11.7 环境下完成测试。已在 Windows、ARM 架构的 macOS、Linux 系统中完成测试。

 ### 从本地加载模型

@@ -123,6 +131,7 @@ Web UI 可以实现如下功能：
  - [x] THUDM/chatglm-6b
  - [x] THUDM/chatglm-6b-int4
  - [x] THUDM/chatglm-6b-int4-qe
+  - [x] ClueAI/ChatYuan-large-v2
 - [ ] Web UI
  - [x] 利用 gradio 实现 Web UI DEMO
  - [x] 添加输出内容及错误提示

--- a/api.py
+++ b/api.py
@@ -42,7 +42,7 @@ async def get_local_doc_qa():
    

 @app.post("/file")
-async def upload_file(UserFile: UploadFile=File(...)):
+async def upload_file(UserFile: UploadFile=File(...),):
    global vs_path
    response = {
        "msg": None,
@@ -67,7 +67,7 @@ async def upload_file(UserFile: UploadFile=File(...)):
    return response 

 @app.post("/qa")
-async def get_answer(UserQuery: Query):
+async def get_answer(query: str = ""):
    response = {
        "status": 0,
        "message": "",
@@ -76,7 +76,7 @@ async def get_answer(UserQuery: Query):
    global vs_path
    history = []
    try:
-        resp, history = local_doc_qa.get_knowledge_based_answer(query=UserQuery.query,
+        resp, history = local_doc_qa.get_knowledge_based_answer(query=query,
                                                                vs_path=vs_path,
                                                                chat_history=history)
        if REPLY_WITH_SOURCE:
@@ -95,9 +95,9 @@ async def get_answer(UserQuery: Query):

 if __name__ == "__main__":
    uvicorn.run(
-        app='api:app', 
+        app=app,
        host='0.0.0.0', 
        port=8100,
-        reload = True,
+        reload=True,
        )

--- a/chains/local_doc_qa.py
+++ b/chains/local_doc_qa.py
@@ -33,6 +33,7 @@ def load_file(filepath):
 class LocalDocQA:
    llm: object = None
    embeddings: object = None
+    top_k: int = VECTOR_SEARCH_TOP_K

    def init_cfg(self,
                 embedding_model: str = EMBEDDING_MODEL,
@@ -49,9 +50,10 @@ class LocalDocQA:
                            use_ptuning_v2=use_ptuning_v2)
        self.llm.history_len = llm_history_len

-        self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[embedding_model], )
-        self.embeddings.client = sentence_transformers.SentenceTransformer(self.embeddings.model_name,
-                                                                           device=embedding_device)
+        self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[embedding_model],
+                                                model_kwargs={'device': embedding_device})
+        # self.embeddings.client = sentence_transformers.SentenceTransformer(self.embeddings.model_name,
+        #                                                                    device=embedding_device)
        self.top_k = top_k

    def init_knowledge_vector_store(self,
@@ -133,7 +135,7 @@ class LocalDocQA:
        )

        knowledge_chain.return_source_documents = True
-
+        
        result = knowledge_chain({"query": query})
        self.llm.history[-1][0] = query
        return result, self.llm.history
--- a/configs/model_config.py
+++ b/configs/model_config.py
@@ -19,6 +19,7 @@ llm_model_dict = {
    "chatglm-6b-int4-qe": "THUDM/chatglm-6b-int4-qe",
    "chatglm-6b-int4": "THUDM/chatglm-6b-int4",
    "chatglm-6b": "THUDM/chatglm-6b",
+    "chatyuan": "ClueAI/ChatYuan-large-v2",
 }

 # LLM model name

--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -95,7 +95,7 @@ Q9: 下载完模型后，如何修改代码以执行本地模型？

 A9: 模型下载完成后，请在 [configs/model_config.py](../configs/model_config.py) 文件中，对`embedding_model_dict`和`llm_model_dict`参数进行修改，如把`llm_model_dict`从

-```json
+```python
 embedding_model_dict = {
    "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
    "ernie-base": "nghuyong/ernie-3.0-base-zh",
@@ -105,7 +105,7 @@ embedding_model_dict = {

 修改为

-```json
+```python
 embedding_model_dict = {
                        "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
                        "ernie-base": "nghuyong/ernie-3.0-base-zh",

--- a/models/chatglm_llm.py
+++ b/models/chatglm_llm.py
@@ -72,14 +72,27 @@ class ChatGLM(LLM):
        response, _ = self.model.chat(
            self.tokenizer,
            prompt,
-            history=self.history[-self.history_len:] if self.history_len>0 else [],
+            history=self.history[-self.history_len:] if self.history_len > 0 else [],
            max_length=self.max_token,
            temperature=self.temperature,
        )
        torch_gc()
        if stop is not None:
            response = enforce_stop_tokens(response, stop)
-        self.history = self.history+[[None, response]]
+        self.history = self.history + [[None, response]]
+        return response
+
+    def chat(self,
+             prompt: str) -> str:
+        response, _ = self.model.chat(
+            self.tokenizer,
+            prompt,
+            history=self.history[-self.history_len:] if self.history_len > 0 else [],
+            max_length=self.max_token,
+            temperature=self.temperature,
+        )
+        torch_gc()
+        self.history = self.history + [[None, response]]
        return response

    def load_model(self,
@@ -113,7 +126,7 @@ class ChatGLM(LLM):
                    AutoModel.from_pretrained(
                        model_name_or_path,
                        config=model_config,
-                        trust_remote_code=True, 
+                        trust_remote_code=True,
                        **kwargs)
                    .half()
                    .cuda()
@@ -146,7 +159,8 @@ class ChatGLM(LLM):
                        new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
                self.model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
                self.model.transformer.prefix_encoder.float()
-            except Exception:
+            except Exception as e:
+                print(e)
                print("加载PrefixEncoder模型参数失败")

        self.model = self.model.eval()
--- a/requirements.txt
+++ b/requirements.txt
-langchain>=0.0.124
+langchain>=0.0.146
 transformers==4.27.1
 unstructured[local-inference]
 layoutparser[layoutmodels,tesseract]
@@ -9,4 +9,4 @@ icetk
 cpm_kernels
 faiss-cpu
 gradio>=3.25.0
-detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2
\ No newline at end of file
+#detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2
\ No newline at end of file
--- a/webui.py
+++ b/webui.py
@@ -17,10 +17,10 @@ LLM_HISTORY_LEN = 3
 def get_vs_list():
    if not os.path.exists(VS_ROOT_PATH):
        return []
-    return ["新建知识库"] + os.listdir(VS_ROOT_PATH)
+    return os.listdir(VS_ROOT_PATH)


-vs_list = get_vs_list()
+vs_list = ["新建知识库"] + get_vs_list()

 embedding_model_dict_list = list(embedding_model_dict.keys())