Merge branch 'dev'

dc0cdfba · imClumsyPanda · 1fe50b04 · 0234a95f · dc0cdfba · dc0cdfba
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+# Other files
+output/*
+log/*
+.chroma
+vector_store/*
\ No newline at end of file
--- a/README.md
+++ b/README.md
@@ -18,16 +18,14 @@
 ## 更新信息
-**[2023/04/07]** 
+**[2023/04/15]**
-1. 解决加载 ChatGLM 模型时发生显存占用为双倍的问题 (感谢 [@suc16](https://github.com/suc16) 和 [@myml](https://github.com/myml)) ；
+1. 重构项目结构，在根目录下保留命令行 Demo [cli_demo.py](cli_demo.py) 和 Web UI Demo [webui.py](webui.py)；
-2. 新增清理显存机制；
+2. 对 Web UI 进行改进，修改为运行 Web UI 后首先按照 [configs/model_config.py](configs/model_config.py) 默认选项加载模型，并增加报错提示信息等；
-3. 新增`nghuyong/ernie-3.0-nano-zh`和`nghuyong/ernie-3.0-base-zh`作为 Embedding 模型备选项，相比`GanymedeNil/text2vec-large-chinese`占用显存资源更少 (感谢 [@lastrei](https://github.com/lastrei))。
+3. 对常见问题进行补充说明。
-**[2023/04/09]**
+**[2023/04/12]**
-1. 使用`langchain`中的`RetrievalQA`替代之前选用的`ChatVectorDBChain`，替换后可以有效减少提问 2-3 次后因显存不足而停止运行的问题；
+1. 替换 Web UI 中的样例文件，避免出现 Ubuntu 中出现因文件编码无法读取的问题；
-2. 在`knowledge_based_chatglm.py`中增加`EMBEDDING_MODEL`、`VECTOR_SEARCH_TOP_K`、`LLM_MODEL`、`LLM_HISTORY_LEN`、`REPLY_WITH_SOURCE`参数值设置；
+2. 替换`knowledge_based_chatglm.py`中的 prompt 模版，避免出现因 prompt 模版包含中英双语导致 chatglm 返回内容错乱的问题。
-3. 增加 GPU 显存需求更小的`chatglm-6b-int4`、`chatglm-6b-int4-qe`作为 LLM 模型备选项；
-4. 更正`README.md`中的代码错误（感谢 [@calcitem](https://github.com/calcitem)）。
 **[2023/04/11]** 
 1. 加入 Web UI V0.1 版本（感谢 [@liangtongt](https://github.com/liangtongt)）；
@@ -35,15 +33,22 @@
 3. 增加 LLM 和 Embedding 模型运行设备是否可用`cuda`、`mps`、`cpu`的自动判断。
 4. 在`knowledge_based_chatglm.py`中增加对`filepath`的判断，在之前支持单个文件导入的基础上，现支持单个文件夹路径作为输入，输入后将会遍历文件夹中各个文件，并在命令行中显示每个文件是否成功加载。
-**[2023/04/12]**
+**[2023/04/09]**
-1. 替换 Web UI 中的样例文件，避免出现 Ubuntu 中出现因文件编码无法读取的问题；
+1. 使用`langchain`中的`RetrievalQA`替代之前选用的`ChatVectorDBChain`，替换后可以有效减少提问 2-3 次后因显存不足而停止运行的问题；
-2. 替换`knowledge_based_chatglm.py`中的 prompt 模版，避免出现因 prompt 模版包含中英双语导致 chatglm 返回内容错乱的问题。
+2. 在`knowledge_based_chatglm.py`中增加`EMBEDDING_MODEL`、`VECTOR_SEARCH_TOP_K`、`LLM_MODEL`、`LLM_HISTORY_LEN`、`REPLY_WITH_SOURCE`参数值设置；
+3. 增加 GPU 显存需求更小的`chatglm-6b-int4`、`chatglm-6b-int4-qe`作为 LLM 模型备选项；
+4. 更正`README.md`中的代码错误（感谢 [@calcitem](https://github.com/calcitem)）。
+**[2023/04/07]** 
+1. 解决加载 ChatGLM 模型时发生显存占用为双倍的问题 (感谢 [@suc16](https://github.com/suc16) 和 [@myml](https://github.com/myml)) ；
+2. 新增清理显存机制；
+3. 新增`nghuyong/ernie-3.0-nano-zh`和`nghuyong/ernie-3.0-base-zh`作为 Embedding 模型备选项，相比`GanymedeNil/text2vec-large-chinese`占用显存资源更少 (感谢 [@lastrei](https://github.com/lastrei))。
 ## 使用方式
 ### 硬件需求
 - ChatGLM-6B 模型硬件需求
    | **量化等级**   | **最低 GPU 显存**（推理） | **最低 GPU 显存**（高效参数微调） |
    | -------------- | ------------------------- | --------------------------------- |
    | FP16（无量化） | 13 GB                     | 14 GB                             |
@@ -53,55 +58,81 @@
 - Embedding 模型硬件需求
    本项目中默认选用的 Embedding 模型 [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main) 约占用显存 3GB，也可修改为在 CPU 中运行。
 ### 软件需求
-本项目已在 python 3.8 环境下完成测试。
-### 1. 安装 python 依赖包
+本项目已在 Python 3.8，CUDA 11.7 环境下完成测试。
-```commandline
-pip install -r requirements.txt
+### 1. 安装环境
+- 环境检查
+```shell
+# 首先，确信你的机器安装了 Python 3.8 及以上版本
+$ python --version
+Python 3.8.13
+# 如果低于这个版本，可使用conda安装环境
+$ conda create -p /your_path/env_name python=3.8
+# 激活环境
+$ source activate /your_path/env_name
+# 关闭环境
+$ source deactivate /your_path/env_name
+# 删除环境
+$ conda env remove -p  /your_path/env_name
+```
+- 项目依赖
+```shell
+# 拉取仓库
+$ git clone https://github.com/imClumsyPanda/langchain-ChatGLM.git
+# 安装依赖
+$ pip install -r requirements.txt
 ```
 注：使用 langchain.document_loaders.UnstructuredFileLoader 进行非结构化文件接入时，可能需要依据文档进行其他依赖包的安装，请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)
-### 2. 执行脚本体验 Web UI 或命令行交互
+### 2. 设置模型默认参数
-执行 [webui.py](webui.py) 脚本体验 **Web 交互** <img src="https://img.shields.io/badge/Version-0.1-brightgreen">
-```commandline
+在开始执行 Web UI 或命令行交互前，请先检查 [configs/model_config.py](configs/model_config.py) 中的各项模型参数设计是否符合需求。
-python webui.py
+### 3. 执行脚本体验 Web UI 或命令行交互
+执行 [webui.py](webui.py) 脚本体验 **Web 交互**
+```shell
+$ python webui.py
 ```
+注：如未将模型下载至本地，请执行前检查`$HOME/.cache/huggingface/`文件夹剩余空间，至少15G
 执行后效果如下图所示：
 ![webui](img/ui1.png)
-Web UI 中提供的 API 接口如下图所示：
-![webui](img/ui2.png)
 Web UI 可以实现如下功能：
-1. 自动读取`knowledge_based_chatglm.py`中`LLM`及`embedding`模型枚举，选择后点击`setting`进行模型加载，可随时切换模型进行测试
+1. 运行前自动读取`configs/model_config.py`中`LLM`及`Embedding`模型枚举及默认模型设置运行模型，如需重新加载模型，可在界面重新选择后点击`重新加载模型`进行模型加载；
 2. 可手动调节保留对话历史长度，可根据显存大小自行调节
-3. 添加上传文件功能，通过下拉框选择已上传的文件，点击`loading`加载文件，过程中可随时更换加载的文件
+3. 添加上传文件功能，通过下拉框选择已上传的文件，点击`加载文件`按钮，过程中可随时更换加载的文件
-4. 底部添加`use via API`可对接到自己系统
-或执行 [knowledge_based_chatglm.py](knowledge_based_chatglm.py) 脚本体验**命令行交互**
+或执行 [knowledge_based_chatglm.py](cli_demo.py) 脚本体验**命令行交互**
-```commandline
+```shell
-python knowledge_based_chatglm.py
+$ python knowledge_based_chatglm.py
 ```
 ### 常见问题
 Q1: 本项目支持哪些文件格式？
-A1: 目前已测试支持 txt、docx、md 格式文件，更多文件格式请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)。目前已知文档中若含有特殊字符，可能存在文件无法加载的问题。
+A1: 目前已测试支持 txt、docx、md、pdf 格式文件，更多文件格式请参考 [langchain 文档](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)。目前已知文档中若含有特殊字符，可能存在文件无法加载的问题。
-Q2: 读取特定格式文件时遇到缺少`detectron2`时如何解决？
+Q3: 使用过程中 Python 包`nltk`发生了`Resource punkt not found.`报错，该如何解决？
-A2: 因该包安装过程中遇到问题较多，且仅部分格式文件需要，所以未加入`requirements.txt`。可以通过一下命令安装
-```commandline
-pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2"
-```
-Q3: `Resource punkt not found.` 如何解决？
 A3: https://github.com/nltk/nltk_data/raw/gh-pages/packages/tokenizers/punkt.zip 中的 `packages/tokenizers` 解压，放到  `nltk_data/tokenizers` 存储路径下。
 `nltk_data` 存储路径可以通过 `nltk.data.path` 查询。
-Q4: `Resource averaged_perceptron_tagger not found.` 如何解决？
+Q4: 使用过程中 Python 包`nltk`发生了`Resource averaged_perceptron_tagger not found.`报错，该如何解决？
 A4: 将 https://github.com/nltk/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip 下载，解压放到 `nltk_data/taggers` 存储路径下。
@@ -111,6 +142,60 @@ Q5: 本项目可否在 colab 中运行？
 A5: 可以尝试使用 chatglm-6b-int4 模型在 colab 中运行，需要注意的是，如需在 colab 中运行 Web UI，需将`webui.py`中`demo.queue(concurrency_count=3).launch(
    server_name='0.0.0.0', share=False, inbrowser=False)`中参数`share`设置为`True`。
+Q6: 在 Anaconda 中使用 pip 安装包无效如何解决？
+A6: 此问题是系统环境问题，详细见  [在Anaconda中使用pip安装包无效问题](docs/在Anaconda中使用pip安装包无效问题.md)
+Q7: 本项目中所需模型如何下载至本地？
+A7: 本项目中使用的模型均为`huggingface.com`中可下载的开源模型，以默认选择的`chatglm-6b`和`text2vec-large-chinese`模型为例，下载模型可执行如下代码：
+```shell
+# 安装 git lfs
+$ git lfs install
+# 下载 LLM 模型
+$ git clone https://huggingface.co/THUDM/chatglm-6b /your_path/chatglm-6b
+# 下载 Embedding 模型
+$ git clone https://huggingface.co/GanymedeNil/text2vec-large-chinese /your_path/text2vec
+# 模型需要更新时，可打开模型所在文件夹后拉取最新模型文件/代码
+$ git pull
+```
+Q8: `huggingface.com`中模型下载速度较慢怎么办？
+A8: 可使用本项目用到的模型权重文件百度网盘地址：
+- ernie-3.0-base-zh.zip 链接: https://pan.baidu.com/s/1CIvKnD3qzE-orFouA8qvNQ?pwd=4wih
+- ernie-3.0-nano-zh.zip 链接: https://pan.baidu.com/s/1Fh8fgzVdavf5P1omAJJ-Zw?pwd=q6s5
+- text2vec-large-chinese.zip 链接: https://pan.baidu.com/s/1sMyPzBIXdEzHygftEoyBuA?pwd=4xs7
+- chatglm-6b-int4-qe.zip 链接: https://pan.baidu.com/s/1DDKMOMHtNZccOOBGWIOYww?pwd=22ji
+- chatglm-6b-int4.zip 链接: https://pan.baidu.com/s/1pvZ6pMzovjhkA6uPcRLuJA?pwd=3gjd
+- chatglm-6b.zip 链接: https://pan.baidu.com/s/1B-MpsVVs1GHhteVBetaquw?pwd=djay
+Q9: 下载完模型后，如何修改代码以执行本地模型？
+A9: 模型下载完成后，请在 [configs/model_config.py](configs/model_config.py) 文件中，对`embedding_model_dict`和`llm_model_dict`参数进行修改，如把`llm_model_dict`从
+```
+embedding_model_dict = {
+    "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
+    "ernie-base": "nghuyong/ernie-3.0-base-zh",
+    "text2vec": "GanymedeNil/text2vec-large-chinese"
+}
+```
+修改为
+```
+embedding_model_dict = {
+                        "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
+                        "ernie-base": "nghuyong/ernie-3.0-base-zh",
+                        "text2vec": "/Users/liuqian/Downloads/ChatGLM-6B/text2vec-large-chinese"
+}
+```
 ## DEMO
 以问题`chatglm-6b 的局限性具体体现在哪里，如何实现改进`为例
@@ -148,15 +233,14 @@ A5: 可以尝试使用 chatglm-6b-int4 模型在 colab 中运行，需要注意
  - [x] .pdf(需要按照常见问题 Q2 中描述进行`detectron2`的安装)
  - [x] .docx
  - [x] .txt
+  - [ ] 搜索引擎与本地网页
 - [ ] 增加更多 LLM 模型支持
  - [x] THUDM/chatglm-6b
  - [x] THUDM/chatglm-6b-int4
  - [x] THUDM/chatglm-6b-int4-qe
 - [ ] 增加 Web UI DEMO
  - [x] 利用 gradio 实现 Web UI DEMO
-  - [ ] 添加模型加载进度条
+  - [x] 添加输出内容及错误提示
-  - [ ] 添加输出内容及错误提示
-  - [ ] 国际化语言切换
  - [ ] 引用标注
 - [ ] 利用 fastapi 实现 API 部署方式，并实现调用 API 的 web ui DEMO

--- a/README_en.md
+++ b/README_en.md
-# ChatGLM Application Based on Local Knowledge
+# ChatGLM Application with Local Knowledge Implementation
 ## Introduction
+[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white "langchain-chatglm")](https://t.me/+RjliQ3jnJ1YyN2E9)
 🌍 [_中文文档_](README.md)
-🤖️ A local knowledge based LLM Application with [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) and [langchain](https://github.com/hwchase17/langchain).
+🤖️ This is a ChatGLM application based on local knowledge, implemented using [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) and [langchain](https://github.com/hwchase17/langchain).
+💡 Inspired by [document.ai](https://github.com/GanymedeNil/document.ai) and [Alex Zhangji](https://github.com/AlexZhangji)'s [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216), this project establishes a local knowledge question-answering application using open-source models.
+✅ The embeddings used in this project are [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main), and the LLM is [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B). Relying on these models, this project enables the use of **open-source** models for **offline private deployment**.
+⛓️ The implementation principle of this project is illustrated in the figure below. The process includes loading files -> reading text -> text segmentation -> text vectorization -> question vectorization -> matching the top k most similar text vectors to the question vector -> adding the matched text to `prompt` along with the question as context -> submitting to `LLM` to generate an answer.
+![Implementation schematic diagram](img/langchain+chatglm.png)
+🚩 This project does not involve fine-tuning or training; however, fine-tuning or training can be employed to optimize the effectiveness of this project.
-💡 Inspired by [document.ai](https://github.com/GanymedeNil/document.ai) by [GanymedeNil](https://github.com/GanymedeNil) and [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) by [AlexZhangji](https://github.com/AlexZhangji).
-✅ In this project, [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main) is used as Embedding Model，and [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) used as LLM。Based on those models，this project can be deployed **offline** with all **open source** models。
+## Changelog
-## Webui 
+**[2023/04/15]**
-![webui](./img/ui1.png)
-Click on steps 1-3 according to the above figure to complete the model loading, file loading, and viewing of dialogue history
-![webui](./img/ui2.png)
+   1. refactor the project structure to keep the command line demo [cli_demo.py](cli_demo.py) and the Web UI demo [webui.py](webui.py) in the root directory.
-Click on the Use via API at the bottom to view the API interface. Existing applications can be docked and called through post requests
+   2. Improve the Web UI by modifying it to first load the model according to the default option of [configs/model_config.py](configs/model_config.py) after running the Web UI, and adding error messages, etc.
+   3. Update FAQ.
-### TODO
+**[2023/04/12]**
-[] Add Model Load progress bar
-[] Add output content and error prompts
-[] International language switching
-[] Reference annotation
-[] Add plugin system (can be used for basic LORA training, etc.)
-## Update
+   1. Replaced the sample files in the Web UI to avoid issues with unreadable files due to encoding problems in Ubuntu;
+   2. Replaced the prompt template in `knowledge_based_chatglm.py` to prevent confusion in the content returned by ChatGLM, which may arise from the prompt template containing Chinese and English bilingual text.
-**[2023/04/11]** 
+**[2023/04/11]**
-1. Add Webui V0.1 version and synchronize the updated content before the current day;
-2. Automatically read knowledge_ based_ Enumerate LLM and embedding models in chatglm.py, select and click 'setting' to load the model. You can switch models for testing at any time
+   1. Added Web UI V0.1 version (thanks to [@liangtongt](https://github.com/liangtongt));
-3. The length of the conversation history can be manually adjusted and can be adjusted according to the size of the video memory
+   2. Added Frequently Asked Questions in `README.md` (thanks to [@calcitem](https://github.com/calcitem) and [@bolongliu](https://github.com/bolongliu));
-4. Add the upload file function, select the uploaded file from the dropdown box, click loading to load the file, and the loaded file can be changed at any time during the process
+   3. Enhanced automatic detection for the availability of `cuda`, `mps`, and `cpu` for LLM and Embedding model running devices;
-5. Add use via API at the bottom to connect to your own system
+   4. Added a check for `filepath` in `knowledge_based_chatglm.py`. In addition to supporting single file import, it now supports a single folder path as input. After input, it will traverse each file in the folder and display a command-line message indicating the success of each file load.
+5. **[2023/04/09]**
+   1. Replaced the previously selected `ChatVectorDBChain` with `RetrievalQA` in `langchain`, effectively reducing the issue of stopping due to insufficient video memory after asking 2-3 times;
+   2. Added `EMBEDDING_MODEL`, `VECTOR_SEARCH_TOP_K`, `LLM_MODEL`, `LLM_HISTORY_LEN`, `REPLY_WITH_SOURCE` parameter value settings in `knowledge_based_chatglm.py`;
+   3. Added `chatglm-6b-int4` and `chatglm-6b-int4-qe`, which require less GPU memory, as LLM model options;
+   4. Corrected code errors in `README.md` (thanks to [@calcitem](https://github.com/calcitem)).
 **[2023/04/07]**
-1. Fix bug which costs twice gpu memory (Thanks to [@suc16](https://github.com/suc16) and [@myml](https://github.com/myml)).
-2. Add gpu memory clear function after each call of ChatGLM.
-3. Add `nghuyong/ernie-3.0-nano-zh` and `nghuyong/ernie-3.0-base-zh` as Embedding model alternatives，costing less gpu than `GanymedeNil/text2vec-large-chinese` (Thanks to [@lastrei](https://github.com/lastrei))
-**[2023/04/09]**
+   1. Resolved the issue of doubled video memory usage when loading the ChatGLM model (thanks to [@suc16](https://github.com/suc16) and [@myml](https://github.com/myml));
-1. Using `RetrievalQA` in `langchain` to replace the previously selected `ChatVectorDBChain`, the replacement can effectively solve the problem of program stopping after 2-3 questions due to insufficient gpu memory.
+   2. Added a mechanism to clear video memory;
-2. Add `EMBEDDING_MODEL`, `VECTOR_SEARCH_TOP_K`, `LLM_MODEL`, `LLM_HISTORY_LEN`, `REPLY_WITH_SOURCE` parameter value settings in `knowledge_based_chatglm.py`.
+   3. Added `nghuyong/ernie-3.0-nano-zh` and `nghuyong/ernie-3.0-base-zh` as Embedding model options, which consume less video memory resources than `GanymedeNil/text2vec-large-chinese` (thanks to [@lastrei](https://github.com/lastrei)).
-3. Add `chatglm-6b-int4`, `chatglm-6b-int4-qe` with smaller GPU memory requirements as LLM model alternatives.
-4. Correct code errors in `README.md` (Thanks to [@calcitem](https://github.com/calcitem)).
-## Usage
+## How to Use
 ### Hardware Requirements
- ChatGLM Hardware Requirements
+- ChatGLM-6B Model Hardware Requirements
+     | **Quantization Level** | **Minimum GPU Memory** (inference) | **Minimum GPU Memory** (efficient parameter fine-tuning) |
+     | -------------- | ------------------------- | -------- ------------------------- |
+     | FP16 (no quantization) | 13 GB | 14 GB |
+     | INT8 | 8 GB | 9 GB |
+     | INT4 | 6 GB | 7 GB |
-    | **Quantization Level** | **GPU Memory** |
+- Embedding Model Hardware Requirements
-    |------------------------|----------------|
-    | FP16（no quantization）  | 13 GB          |
-    | INT8                   | 10 GB          |
-    | INT4                   | 6 GB           |
- Embedding Hardware Requirements
-   The default Embedding model in this repo is [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main), 3GB GPU Memory required when running on GPU.
+     The default Embedding model [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main) in this project occupies around 3GB of video memory and can also be configured to run on a CPU.
 ### Software Requirements
-This repo has been tested in python 3.8 environment。
-### 1. install python packages
+This repository has been tested with Python 3.8 and CUDA 11.7 environments.
+### 1. Setting up the environment
+* Environment check
+```shell
+# First, make sure your machine has Python 3.8 or higher installed
+$ python --version
+Python 3.8.13
+# If your version is lower, you can use conda to install the environment
+$ conda create -p /your_path/env_name python=3.8
+# Activate the environment
+$ source activate /your_path/env_name
+# Deactivate the environment
+$ source deactivate /your_path/env_name
+# Remove the environment
+$ conda env remove -p  /your_path/env_name
+```
+* Project dependencies
+```shell
+# Clone the repository
+$ git clone https://github.com/imClumsyPanda/langchain-ChatGLM.git
+# Install dependencies
+$ pip install -r requirements.txt
+```
+Note: When using langchain.document_loaders.UnstructuredFileLoader for unstructured file integration, you may need to install other dependency packages according to the documentation. Please refer to [langchain documentation](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html).
+### 2. Run Scripts to Experience Web UI or Command Line Interaction
+Execute [webui.py](webui.py) script to experience **Web interaction** <img src="https://img.shields.io/badge/Version-0.1-brightgreen">
 ```commandline
-pip install -r requirements.txt
+python webui.py
 ```
-Attention: With langchain.document_loaders.UnstructuredFileLoader used to connect with local knowledge file, you may need some other dependencies as mentioned in  [langchain documentation](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html)
+Note: Before executing, check the remaining space in the `$HOME/.cache/huggingface/` folder, at least 15G.
+The resulting interface is shown below:
+![webui](img/ui1.png)
+The Web UI supports the following features:
+1. Automatically reads the `LLM` and `embedding` model enumerations in `configs/model_config.py`, allowing you to select and reload the model by clicking `重新加载模型`.
+2. The length of retained dialogue history can be manually adjusted according to the available video memory.
+3. Adds a file upload function. Select the uploaded file through the drop-down box, click `加载文件` to load the file, and change the loaded file at any time during the process.
+Alternatively, execute the [knowledge_based_chatglm.py](https://chat.openai.com/chat/cli_demo.py) script to experience **command line interaction**:
-### 2. Run [knowledge_based_chatglm.py](knowledge_based_chatglm.py) script
 ```commandline
 python knowledge_based_chatglm.py
 ```
-### Known issues
- Currently tested to support txt, docx, md format files, for more file formats please refer to [langchain documentation](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html). If the document contains special characters, the file may not be correctly loaded.
- When running this project with macOS, it may not work properly due to incompatibility with pytorch caused by macOS version 13.3 and above.
 ### FAQ
-Q: How to solve `Resource punkt not found.`?
+Q1: What file formats does this project support?
-A: Unzip `packages/tokenizers` in https://github.com/nltk/nltk_data/raw/gh-pages/packages/tokenizers/punkt.zip and put it in the corresponding directory of `Searched in:`.
+A1: Currently, this project has been tested with txt, docx, and md file formats. For more file formats, please refer to the [langchain documentation](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html). It is known that if the document contains special characters, there might be issues with loading the file.
-Q: How to solve `Resource averaged_perceptron_tagger not found.`?
+Q2: How can I resolve the `detectron2` dependency issue when reading specific file formats?
-A: Download https://github.com/nltk/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip, decompress it and put it in the corresponding directory of `Searched in:`.
+A2: As the installation process for this package can be problematic and it is only required for some file formats, it is not included in `requirements.txt`. You can install it with the following command:
-## Roadmap
+```commandline
+pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2"
+```
+Q3: How can I solve the `Resource punkt not found.` error?
+A3: Unzip the `packages/tokenizers` folder from https://github.com/nltk/nltk_data/raw/gh-pages/packages/tokenizers/punkt.zip, and place it in the `nltk_data/tokenizers` storage path.
+The `nltk_data` storage path can be found using `nltk.data.path`.
- [x] local knowledge based application with langchain + ChatGLM-6B
+Q4: How can I solve the `Resource averaged_perceptron_tagger not found.` error?
- [x] unstructured files loaded with langchain
- [ ] more different file format loaded with langchain
+A4: Download https://github.com/nltk/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip, extract it, and place it in the `nltk_data/taggers` storage path.
- [ ] implement web ui DEMO with gradio/streamlit 
- [ ] implement API with fastapi，and web ui DEMO with API
+The `nltk_data` storage path can be found using `nltk.data.path`.
+Q5: Can this project run in Google Colab?
+A5: You can try running the chatglm-6b-int4 model in Google Colab. Please note that if you want to run the Web UI in Colab, you need to set the `share` parameter in `demo.queue(concurrency_count=3).launch(server_name='0.0.0.0', share=False, inbrowser=False)` to `True`.
+This issue is related to the system environment. For more details, please refer to [Issues with installing packages using pip in Anaconda](docs/Issue-with-Installing-Packages-Using-pip-in-Anaconda.md).
+## DEMO
+Using the question "What are the limitations of ChatGLM-6B and how can they be improved?" as an example:
+Without using langchain to access local documents:
+> ChatGLM-6B is a chatbot based on GPT-3, and its limitations are mainly reflected in the following aspects:
+>
+>   1. Language model accuracy: Although GPT-3 is a large language model with relatively high accuracy, it still has some limitations. For instance, it may not be able to accurately understand certain dialects, accents, or complex sentence structures.
+>   2. Completeness of the knowledge graph: GPT-3 can generate some knowledge graphs, but its completeness and accuracy may not be sufficient. For example, it may struggle to accurately understand specific domain expertise and terminology.
+>   3. Coherence of dialogue generation: While GPT-3 can generate somewhat coherent dialogues, they may not always be natural. For instance, it may generate false, incoherent dialogue or misinterpret the user's intent.
+>
+>   To improve ChatGLM-6B, consider the following aspects:
+>
+>   1. Enhance the accuracy of the language model: The accuracy of the language model can be improved by increasing the training data, using better language model architectures, and optimizing the model training process.
+>   2. Strengthen the integrity of the knowledge graph: The integrity of the knowledge graph can be enhanced by increasing its dimensions, employing better knowledge graph construction methods, and optimizing the knowledge graph's matching algorithm.
+>   3. Boost the coherence of dialogue generation: The coherence of dialogue generation can be improved by augmenting the context information of the dialogue generation model, utilizing better context information extraction methods, and optimizing the dialogue generation model's algorithm.
+>   4. Incorporate more human feedback: Human feedback can help ChatGLM-6B better understand users' intentions and needs, thereby improving the quality and accuracy of dialogue generation. More human feedback can be introduced by involving more human agents and using human feedback collectors.
+ChatGLM's answer after using LangChain to access the README.md file of the ChatGLM-6B project:
+>ChatGLM-6B is a deep learning-based natural language processing model that excels at answering questions. However, it also has some limitations, as follows:
+>1. Limited language understanding: ChatGLM-6B has been primarily trained on Chinese natural language, and its understanding of other languages may be limited.
+>2. Insufficient knowledge base: The training dataset of ChatGLM-6B contains only a Chinese corpus, so it may not be able to answer non-Chinese questions or queries in specific domains.
+>3. Limited data volume: ChatGLM-6B's training dataset has only a few million records, which may hinder its ability to answer very specific or complex questions.
+>
+>To improve ChatGLM-6B, consider the following aspects:
+>1. Expand language knowledge: Learn natural language processing techniques in other languages to broaden the model's language understanding capabilities.
+>2. Broaden the knowledge base: Collect more Chinese corpora or use datasets in other languages to expand the model's knowledge base.
+>3. Increase data volume: Use larger datasets to train ChatGLM-6B, which can improve the model's performance.
+>4. Introduce more evaluation metrics: Incorporate additional evaluation metrics to assess the model's performance, which can help identify the shortcomings and limitations of ChatGLM-6B.
+>5. Enhance the model architecture: Improve ChatGLM-6B's model architecture to boost its performance and capabilities. For example, employ larger neural networks or refined convolutional neural network structures.
+## Roadmap
+- [x] Implement LangChain + ChatGLM-6B for local knowledge application
+- [x] Unstructured file access based on langchain
+   - [x].md
+   - [x].pdf
+   - [x].docx
+   - [x].txt
+- [ ] Add support for more LLM models
+   - [x] THUDM/chatglm-6b
+   - [x] THUDM/chatglm-6b-int4
+   - [x] THUDM/chatglm-6b-int4-qe
+- [ ] Add Web UI DEMO
+   - [x]  Implement Web UI DEMO using Gradio
+   - [x] Add output and error messages
+   - [ ] Citation callout
+- [ ] Use FastAPI to implement API deployment method and develop a Web UI DEMO for API calls
--- a/agent/__init__.py
+++ b/agent/__init__.py
+from .chatglm_with_shared_memory_openai_llm import *
\ No newline at end of file
--- a/chains/local_doc_qa.py
+++ b/chains/local_doc_qa.py
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.document_loaders import UnstructuredFileLoader
+from models.chatglm_llm import ChatGLM
+import sentence_transformers
+import os
+from configs.model_config import *
+import datetime
+from typing import List
+# return top-k text chunk from vector store
+VECTOR_SEARCH_TOP_K = 10
+# LLM input history length
+LLM_HISTORY_LEN = 3
+# Show reply with source text from input document
+REPLY_WITH_SOURCE = True
+class LocalDocQA:
+    llm: object = None
+    embeddings: object = None
+    def init_cfg(self,
+                 embedding_model: str = EMBEDDING_MODEL,
+                 embedding_device=EMBEDDING_DEVICE,
+                 llm_history_len: int = LLM_HISTORY_LEN,
+                 llm_model: str = LLM_MODEL,
+                 llm_device=LLM_DEVICE,
+                 top_k=VECTOR_SEARCH_TOP_K,
+                 ):
+        self.llm = ChatGLM()
+        self.llm.load_model(model_name_or_path=llm_model_dict[llm_model],
+                            llm_device=llm_device)
+        self.llm.history_len = llm_history_len
+        self.embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[embedding_model], )
+        self.embeddings.client = sentence_transformers.SentenceTransformer(self.embeddings.model_name,
+                                                                           device=embedding_device)
+        self.top_k = top_k
+    def init_knowledge_vector_store(self,
+                                    filepath: str or List[str]):
+        if isinstance(filepath, str):
+            if not os.path.exists(filepath):
+                print("路径不存在")
+                return None
+            elif os.path.isfile(filepath):
+                file = os.path.split(filepath)[-1]
+                try:
+                    loader = UnstructuredFileLoader(filepath, mode="elements")
+                    docs = loader.load()
+                    print(f"{file} 已成功加载")
+                except:
+                    print(f"{file} 未能成功加载")
+                    return None
+            elif os.path.isdir(filepath):
+                docs = []
+                for file in os.listdir(filepath):
+                    fullfilepath = os.path.join(filepath, file)
+                    try:
+                        loader = UnstructuredFileLoader(fullfilepath, mode="elements")
+                        docs += loader.load()
+                        print(f"{file} 已成功加载")
+                    except:
+                        print(f"{file} 未能成功加载")
+        else:
+            docs = []
+            for file in filepath:
+                try:
+                    loader = UnstructuredFileLoader(file, mode="elements")
+                    docs += loader.load()
+                    print(f"{file} 已成功加载")
+                except:
+                    print(f"{file} 未能成功加载")
+        vector_store = FAISS.from_documents(docs, self.embeddings)
+        vs_path = f"""./vector_store/{os.path.splitext(file)[0]}_FAISS_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}"""
+        vector_store.save_local(vs_path)
+        return vs_path if len(docs)>0 else None
+    def get_knowledge_based_answer(self,
+                                   query,
+                                   vs_path,
+                                   chat_history=[], ):
+        prompt_template = """基于以下已知信息，简洁和专业的来回答用户的问题。
+    如果无法从中得到答案，请说 "根据已知信息无法回答该问题" 或 "没有提供足够的相关信息"，不允许在答案中添加编造成分，答案请使用中文。
+    已知内容:
+    {context}
+    问题:
+    {question}"""
+        prompt = PromptTemplate(
+            template=prompt_template,
+            input_variables=["context", "question"]
+        )
+        self.llm.history = chat_history
+        vector_store = FAISS.load_local(vs_path, self.embeddings)
+        knowledge_chain = RetrievalQA.from_llm(
+            llm=self.llm,
+            retriever=vector_store.as_retriever(search_kwargs={"k": self.top_k}),
+            prompt=prompt
+        )
+        knowledge_chain.combine_documents_chain.document_prompt = PromptTemplate(
+            input_variables=["page_content"], template="{page_content}"
+        )
+        knowledge_chain.return_source_documents = True
+        result = knowledge_chain({"query": query})
+        self.llm.history[-1][0] = query
+        return result, self.llm.history
--- a/cli_demo.py
+++ b/cli_demo.py
+from configs.model_config import *
+from chains.local_doc_qa import LocalDocQA
+# return top-k text chunk from vector store
+VECTOR_SEARCH_TOP_K = 10
+# LLM input history length
+LLM_HISTORY_LEN = 3
+# Show reply with source text from input document
+REPLY_WITH_SOURCE = True
+if __name__ == "__main__":
+    local_doc_qa = LocalDocQA()
+    local_doc_qa.init_cfg(llm_model=LLM_MODEL,
+                          embedding_model=EMBEDDING_MODEL,
+                          embedding_device=EMBEDDING_DEVICE,
+                          llm_history_len=LLM_HISTORY_LEN,
+                          top_k=VECTOR_SEARCH_TOP_K)
+    vs_path = None
+    while not vs_path:
+        filepath = input("Input your local knowledge file path 请输入本地知识文件路径：")
+        vs_path = local_doc_qa.init_knowledge_vector_store(filepath)
+    history = []
+    while True:
+        query = input("Input your question 请输入问题：")
+        resp, history = local_doc_qa.get_knowledge_based_answer(query=query,
+                                                                vs_path=vs_path,
+                                                                chat_history=history)
+        if REPLY_WITH_SOURCE:
+            print(resp)
+        else:
+            print(resp["result"])
--- a/configs/model_config.py
+++ b/configs/model_config.py
+import torch.cuda
+import torch.backends
+embedding_model_dict = {
+    "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
+    "ernie-base": "nghuyong/ernie-3.0-base-zh",
+    "text2vec": "GanymedeNil/text2vec-large-chinese",
+}
+# Embedding model name
+EMBEDDING_MODEL = "text2vec"
+# Embedding running device
+EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
+# supported LLM models
+llm_model_dict = {
+    "chatglm-6b-int4-qe": "THUDM/chatglm-6b-int4-qe",
+    "chatglm-6b-int4": "THUDM/chatglm-6b-int4",
+    "chatglm-6b": "THUDM/chatglm-6b",
+}
+# LLM model name
+LLM_MODEL = "chatglm-6b"
+# LLM running device
+LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
--- a/content/langchain-ChatGLM README.md
+++ b/content/langchain-ChatGLM README.md
--- a/content/state_of_the_search.txt
+++ b/content/state_of_the_search.txt
+ChatGPT是OpenAI开发的一个大型语言模型，可以提供各种主题的信息，
+# 如何向 ChatGPT 提问以获得高质量答案：提示技巧工程完全指南
+## 介绍
+我很高兴欢迎您阅读我的最新书籍《The Art of Asking ChatGPT for High-Quality Answers: A complete Guide to Prompt Engineering Techniques》。本书是一本全面指南，介绍了各种提示技术，用于从ChatGPT中生成高质量的答案。
+我们将探讨如何使用不同的提示工程技术来实现不同的目标。ChatGPT是一款最先进的语言模型，能够生成类似人类的文本。然而，理解如何正确地向ChatGPT提问以获得我们所需的高质量输出非常重要。而这正是本书的目的。
+无论您是普通人、研究人员、开发人员，还是只是想在自己的领域中将ChatGPT作为个人助手的人，本书都是为您编写的。我使用简单易懂的语言，提供实用的解释，并在每个提示技术中提供了示例和提示公式。通过本书，您将学习如何使用提示工程技术来控制ChatGPT的输出，并生成符合您特定需求的文本。
+在整本书中，我们还提供了如何结合不同的提示技术以实现更具体结果的示例。我希望您能像我写作时一样，享受阅读本书并从中获得知识。
+<div style="page-break-after:always;"></div>
+## 第一章：Prompt 工程技术简介
+什么是 Prompt 工程？ 
+Prompt 工程是创建提示或指导像 ChatGPT 这样的语言模型输出的过程。它允许用户控制模型的输出并生成符合其特定需求的文本。 
+ChatGPT 是一种先进的语言模型，能够生成类似于人类的文本。它建立在 Transformer 架构上，可以处理大量数据并生成高质量的文本。 
+然而，为了从 ChatGPT 中获得最佳结果，重要的是要了解如何正确地提示模型。 提示可以让用户控制模型的输出并生成相关、准确和高质量的文本。 在使用 ChatGPT 时，了解它的能力和限制非常重要。
+该模型能够生成类似于人类的文本，但如果没有适当的指导，它可能无法始终产生期望的输出。 
+这就是 Prompt 工程的作用，通过提供清晰而具体的指令，您可以引导模型的输出并确保其相关。
+**Prompt 公式是提示的特定格式，通常由三个主要元素组成：** 
+- 任务：对提示要求模型生成的内容进行清晰而简洁的陈述。 
+- 指令：在生成文本时模型应遵循的指令。 
+- 角色：模型在生成文本时应扮演的角色。 
+在本书中，我们将探讨可用于 ChatGPT 的各种 Prompt 工程技术。我们将讨论不同类型的提示，以及如何使用它们实现您想要的特定目标。
+<div style="page-break-after:always;"></div>
+## 第二章：指令提示技术
+现在，让我们开始探索“指令提示技术”，以及如何使用它从ChatGPT中生成高质量的文本。
+指令提示技术是通过为模型提供具体指令来引导ChatGPT的输出的一种方法。这种技术对于确保输出相关和高质量非常有用。
+要使用指令提示技术，您需要为模型提供清晰简洁的任务，以及具体的指令以供模型遵循。
+例如，如果您正在生成客户服务响应，您将提供任务，例如“生成响应客户查询”的指令，例如“响应应该专业且提供准确的信息”。
+提示公式：“按照以下指示生成[任务]：[指令]”
+示例：
+**生成客户服务响应：** 
+- 任务：生成响应客户查询 
+- 指令：响应应该专业且提供准确的信息 
+- 提示公式：“按照以下指示生成专业且准确的客户查询响应：响应应该专业且提供准确的信息。”
+**生成法律文件：** 
+- 任务：生成法律文件 
+- 指令：文件应符合相关法律法规 
+- 提示公式：“按照以下指示生成符合相关法律法规的法律文件：文件应符合相关法律法规。”
+使用指令提示技术时，重要的是要记住指令应该清晰具体。这将有助于确保输出相关和高质量。可以将指令提示技术与下一章节中解释的“角色提示”和“种子词提示”相结合，以增强ChatGPT的输出。
+<div style="page-break-after:always;"></div>
+## 第三章：角色提示
+角色提示技术是通过为ChatGPT指定一个特定的角色来引导其输出的一种方式。这种技术对于生成针对特定上下文或受众的文本非常有用。
+要使用角色提示技术，您需要为模型提供一个清晰具体的角色。
+例如，如果您正在生成客户服务回复，您可以提供一个角色，如“客户服务代表”。
+提示公式：“作为[角色]生成[任务]” 
+示例： 
+**生成客户服务回复：** 
+- 任务：生成对客户查询的回复 
+- 角色：客户服务代表 
+- 提示公式：“作为客户服务代表，生成对客户查询的回复。”
+**生成法律文件：** 
+- 任务：生成法律文件 
+- 角色：律师 
+- 提示公式：“作为律师，生成法律文件。”
+将角色提示技术与指令提示和种子词提示结合使用可以增强ChatGPT的输出。
+**下面是一个示例，展示了如何将指令提示、角色提示和种子词提示技术结合使用：**
+- 任务：为新智能手机生成产品描述 
+- 指令：描述应该是有信息量的，具有说服力，并突出智能手机的独特功能 
+- 角色：市场代表 种子词：“创新的” 
+- 提示公式：“作为市场代表，生成一个有信息量的、有说服力的产品描述，突出新智能手机的创新功能。该智能手机具有以下功能[插入您的功能]”
+在这个示例中，指令提示用于确保产品描述具有信息量和说服力。角色提示用于确保描述是从市场代表的角度书写的。而种子词提示则用于确保描述侧重于智能手机的创新功能。
+<div style="page-break-after:always;"></div>
+## 第四章：标准提示
+标准提示是一种简单的方法，通过为模型提供一个特定的任务来引导ChatGPT的输出。例如，如果您想生成一篇新闻文章的摘要，您可以提供一个任务，如“总结这篇新闻文章”。
+提示公式：“生成一个[任务]”
+例如： 
+**生成新闻文章的摘要：** 
+- 任务：总结这篇新闻文章 
+- 提示公式：“生成这篇新闻文章的摘要”
+**生成一篇产品评论：** 
+- 任务：为一款新智能手机撰写评论 
+- 提示公式：“生成这款新智能手机的评论”
+此外，标准提示可以与其他技术（如角色提示和种子词提示）结合使用，以增强ChatGPT的输出。
+**以下是如何将标准提示、角色提示和种子词提示技术结合使用的示例：** 
+- 任务：为一台新笔记本电脑撰写产品评论 
+- 说明：评论应客观、信息丰富，强调笔记本电脑的独特特点 
+- 角色：技术专家 
+- 种子词：“强大的” 
+- 提示公式：“作为一名技术专家，生成一个客观而且信息丰富的产品评论，强调新笔记本电脑的强大特点。”
+在这个示例中，标准提示技术用于确保模型生成产品评论。角色提示用于确保评论是从技术专家的角度写的。而种子词提示用于确保评论侧重于笔记本电脑的强大特点。
+<div style="page-break-after:always;"></div>
+## 第五章：零、一和少样本提示
+零样本、一样本和少样本提示是用于从ChatGPT生成文本的技术，最少或没有任何示例。当特定任务的数据有限或任务是新的且未定义时，这些技术非常有用。
+当任务没有可用的示例时，使用零样本提示技术。模型提供一个通用任务，根据对任务的理解生成文本。 
+当任务只有一个示例可用时，使用一样本提示技术。模型提供示例，并根据对示例的理解生成文本。 
+当任务只有有限数量的示例可用时，使用少样本提示技术。模型提供示例，并根据对示例的理解生成文本。 
+提示公式：“基于[数量]个示例生成文本” 
+例如： 
+**为没有可用示例的新产品编写产品描述：** 
+- 任务：为新的智能手表编写产品描述 
+- 提示公式：“基于零个示例为这款新智能手表生成产品描述” 
+**使用一个示例生成产品比较：** 
+- 任务：将新款智能手机与最新的iPhone进行比较 
+- 提示公式：“使用一个示例（最新的iPhone）为这款新智能手机生成产品比较” 
+**使用少量示例生成产品评论：** 
+- 任务：为新的电子阅读器撰写评论 
+- 提示公式：“使用少量示例（3个其他电子阅读器）为这款新电子阅读器生成评论” 
+这些技术可用于根据模型对任务或提供的示例的理解生成文本。
+<div style="page-break-after:always;"></div>
+## 第六章：“让我们思考一下”提示
+“让我们思考一下”提示是一种技巧，可鼓励ChatGPT生成反思和思考性的文本。这种技术适用于撰写论文、诗歌或创意写作等任务。 
+“让我们思考一下”提示的公式非常简单，即“让我们思考一下”后跟一个主题或问题。 
+例如： 
+**生成一篇反思性论文：** 
+- 任务：就个人成长主题写一篇反思性论文 
+- 提示公式：“让我们思考一下：个人成长” 
+**生成一首诗：** 
+- 任务：写一首关于季节变化的诗 
+- 提示公式：“让我们思考一下：季节变化” 
+这个提示要求对特定主题或想法展开对话或讨论。发言者邀请ChatGPT参与讨论相关主题。 
+模型提供了一个提示，作为对话或文本生成的起点。 
+然后，模型使用其训练数据和算法生成与提示相关的响应。这种技术允许ChatGPT根据提供的提示生成上下文适当且连贯的文本。 
+**要使用“让我们思考一下提示”技术与ChatGPT，您可以遵循以下步骤：** 
+1. 确定您要讨论的主题或想法。 
+2. 制定一个明确表达主题或想法的提示，并开始对话或文本生成。 
+3. 用“让我们思考”或“让我们讨论”开头的提示，表明您正在启动对话或讨论。 
+**以下是使用此技术的一些提示示例：** 
+- 提示：“让我们思考气候变化对农业的影响” 
+- 提示：“让我们讨论人工智能的当前状态” 
+- 提示：“让我们谈谈远程工作的好处和缺点” 您还可以添加开放式问题、陈述或一段您希望模型继续或扩展的文本。 
+提供提示后，模型将使用其训练数据和算法生成与提示相关的响应，并以连贯的方式继续对话。 
+这种独特的提示有助于ChatGPT以不同的视角和角度给出答案，从而产生更具动态性和信息性的段落。 
+使用提示的步骤简单易行，可以真正提高您的写作水平。尝试一下，看看效果如何吧。
+<div style="page-break-after:always;"></div>
+## 第七章：自洽提示
+自洽提示是一种技术，用于确保ChatGPT的输出与提供的输入一致。这种技术对于事实核查、数据验证或文本生成中的一致性检查等任务非常有用。
+自洽提示的提示公式是输入文本后跟着指令“请确保以下文本是自洽的”。
+或者，可以提示模型生成与提供的输入一致的文本。
+提示示例及其公式：
+**示例1：文本生成** 
+- 任务：生成产品评论 
+- 指令：评论应与输入中提供的产品信息一致 
+- 提示公式：“生成与以下产品信息一致的产品评论[插入产品信息]”
+**示例2：文本摘要** 
+- 任务：概括一篇新闻文章 
+- 指令：摘要应与文章中提供的信息一致 
+- 提示公式：“用与提供的信息一致的方式概括以下新闻文章[插入新闻文章]”
+**示例3：文本完成** 
+- 任务：完成一个句子 
+- 指令：完成应与输入中提供的上下文一致 
+- 提示公式：“以与提供的上下文一致的方式完成以下句子[插入句子]”
+**示例4：** 
+1. **事实核查：** 
+   任务：检查给定新闻文章的一致性 
+   输入文本：“文章中陈述该城市的人口为500万，但后来又说该城市的人口为700万。” 
+   提示公式：“请确保以下文本是自洽的：文章中陈述该城市的人口为500万，但后来又说该城市的人口为700万。”
+2. **数据验证：** 
+   任务：检查给定数据集的一致性 
+   输入文本：“数据显示7月份的平均温度为30度，但最低温度记录为20度。” 
+   提示公式：“请确保以下文本是自洽的：数据显示7月份的平均温度为30度，但最低温度记录为20度。”
+<div style="page-break-after:always;"></div>
+## 第八章：种子词提示
+种子词提示是一种通过提供特定的种子词或短语来控制ChatGPT输出的技术。种子词提示的提示公式是种子词或短语，后跟指令“请根据以下种子词生成文本”。
+示例：
+**文本生成：** 
+- 任务：编写一篇有关龙的故事 
+- 种子词：“龙” 
+- 提示公式：“请根据以下种子词生成文本：龙”
+**语言翻译：** 
+- 任务：将一句话从英语翻译成西班牙语 
+- 种子词：“你好” 
+- 提示公式：“请根据以下种子词生成文本：你好”
+这种技术允许模型生成与种子词相关的文本并对其进行扩展。这是一种控制模型生成文本与某个特定主题或背景相关的方式。
+种子词提示可以与角色提示和指令提示相结合，以创建更具体和有针对性的生成文本。通过提供种子词或短语，模型可以生成与该种子词或短语相关的文本，并通过提供有关期望输出和角色的信息，模型可以以特定于角色或指令的风格或语气生成文本。这样可以更好地控制生成的文本，并可用于各种应用程序。
+以下是提示示例及其公式：
+**示例1：文本生成** 
+- 任务：编写一首诗 
+- 指令：诗应与种子词“爱”相关，并以十四行诗的形式书写。 
+- 角色：诗人 
+- 提示公式：“作为诗人，根据以下种子词生成与“爱”相关的十四行诗：”
+**示例2：文本完成** 
+- 任务：完成一句话 
+- 指令：完成应与种子词“科学”相关，并以研究论文的形式书写。 
+- 角色：研究员 
+- 提示公式：“作为研究员，请在与种子词“科学”相关且以研究论文的形式书写的情况下完成以下句子：[插入句子]”
+**示例3：文本摘要** 
+- 任务：摘要一篇新闻文章 
+- 指令：摘要应与种子词“政治”相关，并以中立和公正的语气书写。 
+- 角色：记者 
+- 提示公式：“作为记者，请以中立和公正的语气摘要以下新闻文章，与种子词“政治”相关：[插入新闻文章]”
+<div style="page-break-after:always;"></div>
+## 第九章：知识生成提示
+知识生成提示是一种从ChatGPT中引出新的、原创的信息的技术。 
+知识生成提示的公式是“请生成关于X的新的和原创的信息”，其中X是感兴趣的主题。 
+这是一种利用模型预先存在的知识来生成新的信息或回答问题的技术。 
+要将此提示与ChatGPT一起使用，需要将问题或主题作为输入提供给模型，以及指定所生成文本的任务或目标的提示。
+提示应包括有关所需输出的信息，例如要生成的文本类型以及任何特定的要求或限制。
+以下是提示示例及其公式：
+**示例1：知识生成** 
+- 任务：生成有关特定主题的新信息 
+- 说明：生成的信息应准确且与主题相关 
+- 提示公式：“生成有关[特定主题]的新的准确信息”
+**示例2：问答** 
+- 任务：回答问题 
+- 说明：答案应准确且与问题相关 
+- 提示公式：“回答以下问题：[插入问题]”
+**示例3：知识整合** 
+- 任务：将新信息与现有知识整合 
+- 说明：整合应准确且与主题相关 
+- 提示公式：“将以下信息与有关[特定主题]的现有知识整合：[插入新信息]”
+**示例4：数据分析**
+- 任务：从给定的数据集中生成有关客户行为的见解 
+- 提示公式：“请从这个数据集中生成有关客户行为的新的和原创的信息”
+<div style="page-break-after:always;"></div>
+## 第十章：知识整合提示
+这种技术利用模型的现有知识来整合新信息或连接不同的信息片段。
+这种技术对于将现有知识与新信息相结合，以生成更全面的特定主题的理解非常有用。 
+**如何与ChatGPT一起使用：** 
+- 模型应该提供新信息和现有知识作为输入，以及指定生成文本的任务或目标的提示。
+- 提示应包括有关所需输出的信息，例如要生成的文本类型以及任何特定的要求或限制。 
+提示示例及其公式： 
+**示例1：知识整合** 
+- 任务：将新信息与现有知识整合 
+- 说明：整合应准确且与主题相关 
+- 提示公式：“将以下信息与关于[具体主题]的现有知识整合：[插入新信息]” 
+**示例2：连接信息片段** 
+- 任务：连接不同的信息片段 
+- 说明：连接应相关且逻辑清晰 
+- 提示公式：“以相关且逻辑清晰的方式连接以下信息片段：[插入信息1] [插入信息2]” 
+**示例3：更新现有知识** 
+- 任务：使用新信息更新现有知识 
+- 说明：更新的信息应准确且相关 
+- 提示公式：“使用以下信息更新[具体主题]的现有知识：[插入新信息]”
+<div style="page-break-after:always;"></div>
+## 第十一章：多项选择提示
+这种技术向模型提供一个问题或任务以及一组预定义的选项作为潜在答案。
+该技术对于生成仅限于特定选项集的文本非常有用，可用于问答、文本完成和其他任务。模型可以生成仅限于预定义选项的文本。
+要使用ChatGPT的多项选择提示，需要向模型提供一个问题或任务作为输入，以及一组预定义的选项作为潜在答案。提示还应包括有关所需输出的信息，例如要生成的文本类型以及任何特定要求或限制。
+提示示例及其公式：
+**示例1：问答**
+- 任务：回答一个多项选择题
+- 说明：答案应该是预定义的选项之一
+- 提示公式：“通过选择以下选项之一回答以下问题：[插入问题] [插入选项1] [插入选项2] [插入选项3]”
+**示例2：文本完成**
+- 任务：使用预定义选项之一完成句子
+- 说明：完成应该是预定义的选项之一
+- 提示公式：“通过选择以下选项之一完成以下句子：[插入句子] [插入选项1] [插入选项2] [插入选项3]”
+**示例3：情感分析**
+- 任务：将文本分类为积极、中立或消极
+- 说明：分类应该是预定义的选项之一
+- 提示公式：“通过选择以下选项之一，将以下文本分类为积极、中立或消极：[插入文本] [积极] [中立] [消极]”
+<div style="page-break-after:always;"></div>
+## 第十二章：可解释的软提示
+可解释的软提示是一种技术，可以在提供一定的灵活性的同时控制模型生成的文本。它通过提供一组受控输入和关于所需输出的附加信息来实现。这种技术可以生成更具解释性和可控性的生成文本。
+提示示例及其公式：
+**示例1：文本生成** 
+- 任务：生成一个故事 
+- 指令：故事应基于一组给定的角色和特定的主题 
+- 提示公式：“基于以下角色生成故事：[插入角色]和主题：[插入主题]”
+**示例2：文本完成** 
+- 任务：完成一句话 
+- 指令：完成应以特定作者的风格为基础 
+- 提示公式：“以[特定作者]的风格完成以下句子：[插入句子]”
+**示例3：语言建模** 
+- 任务：以特定风格生成文本 
+- 指令：文本应以特定时期的风格为基础 
+- 提示公式：“以[特定时期]的风格生成文本：[插入上下文]”
+<div style="page-break-after:always;"></div>
+## 第十三章：控制生成提示
+控制生成提示是一种技术，可让模型在生成文本时对输出进行高度控制。
+这可以通过提供一组特定的输入来实现，例如模板、特定词汇或一组约束条件，这些输入可用于指导生成过程。
+以下是一些示例和它们的公式：
+**示例1：文本生成** 
+- 任务：生成一个故事 
+- 说明：该故事应基于特定的模板 
+- 提示公式：“根据以下模板生成故事：[插入模板]”
+**示例2：文本补全** 
+- 任务：完成一句话 
+- 说明：完成应使用特定的词汇 
+- 提示公式：“使用以下词汇完成以下句子：[插入词汇]：[插入句子]”
+**示例3：语言建模** 
+- 任务：以特定风格生成文本 
+- 说明：文本应遵循一组特定的语法规则 
+- 提示公式：“生成遵循以下语法规则的文本：[插入规则]：[插入上下文]”
+通过提供一组特定的输入来指导生成过程，控制生成提示使得生成的文本更具可控性和可预测性。
+<div style="page-break-after:always;"></div>
+## 第十四章：问答提示
+问答提示是一种技术，可以让模型生成回答特定问题或任务的文本。通过将问题或任务与可能与问题或任务相关的任何其他信息一起作为输入提供给模型来实现此目的。 
+一些提示示例及其公式如下： 
+**示例1：事实问题回答** 
+- 任务：回答一个事实性问题 
+- 说明：答案应准确且相关 
+- 提示公式：“回答以下事实问题：[插入问题]” 
+**示例2：定义** 
+- 任务：提供一个词的定义 
+- 说明：定义应准确 
+- 提示公式：“定义以下词汇：[插入单词]” 
+**示例3：信息检索** 
+- 任务：从特定来源检索信息 
+- 说明：检索到的信息应相关 
+- 提示公式：“从以下来源检索有关[特定主题]的信息：[插入来源]” 这对于问答和信息检索等任务非常有用。
+<div style="page-break-after:always;"></div>
+## 第十五章：概述提示
+概述提示是一种技术，允许模型在保留其主要思想和信息的同时生成给定文本的较短版本。
+这可以通过将较长的文本作为输入提供给模型并要求其生成该文本的摘要来实现。
+这种技术对于文本概述和信息压缩等任务非常有用。 
+**如何在ChatGPT中使用：** 
+- 应该向模型提供较长的文本作为输入，并要求其生成该文本的摘要。
+- 提示还应包括有关所需输出的信息，例如摘要的所需长度和任何特定要求或限制。 
+提示示例及其公式： 
+**示例1：文章概述** 
+- 任务：概述新闻文章 
+- 说明：摘要应是文章主要观点的简要概述 
+- 提示公式：“用一句简短的话概括以下新闻文章：[插入文章]” 
+**示例2：会议记录** 
+- 任务：概括会议记录 
+- 说明：摘要应突出会议的主要决策和行动 
+- 提示公式：“通过列出主要决策和行动来总结以下会议记录：[插入记录]” 
+**示例3：书籍摘要** 
+- 任务：总结一本书 
+- 说明：摘要应是书的主要观点的简要概述 
+- 提示公式：“用一段简短的段落总结以下书籍：[插入书名]”
+<div style="page-break-after:always;"></div>
+## 第十六章：对话提示
+对话提示是一种技术，允许模型生成模拟两个或更多实体之间对话的文本。通过为模型提供一个上下文和一组角色或实体，以及它们的角色和背景，并要求模型在它们之间生成对话。
+因此，应为模型提供上下文和一组角色或实体，以及它们的角色和背景。还应向模型提供有关所需输出的信息，例如对话或交谈的类型以及任何特定的要求或限制。
+提示示例及其公式： 
+**示例1：对话生成** 
+- 任务：生成两个角色之间的对话 
+- 说明：对话应自然且与给定上下文相关 
+- 提示公式：“在以下情境中生成以下角色之间的对话[插入角色]”
+**示例2：故事写作** 
+- 任务：在故事中生成对话 
+- 说明：对话应与故事的角色和事件一致 
+- 提示公式：“在以下故事中生成以下角色之间的对话[插入故事]”
+**示例3：聊天机器人开发** 
+- 任务：为客服聊天机器人生成对话 
+- 说明：对话应专业且提供准确的信息 
+- 提示公式：“在客户询问[插入主题]时，为客服聊天机器人生成专业和准确的对话”
+因此，这种技术对于对话生成、故事写作和聊天机器人开发等任务非常有用。
+<div style="page-break-after:always;"></div>
+## 第十七章：对抗性提示
+对抗性提示是一种技术，它允许模型生成抵抗某些类型的攻击或偏见的文本。这种技术可用于训练更为稳健和抵抗某些类型攻击或偏见的模型。
+要在ChatGPT中使用对抗性提示，需要为模型提供一个提示，该提示旨在使模型难以生成符合期望输出的文本。提示还应包括有关所需输出的信息，例如要生成的文本类型和任何特定要求或约束。
+提示示例及其公式： 
+**示例1：用于文本分类的对抗性提示** 
+- 任务：生成被分类为特定标签的文本 
+- 说明：生成的文本应难以分类为特定标签 
+- 提示公式：“生成难以分类为[插入标签]的文本”
+**示例2：用于情感分析的对抗性提示** 
+- 任务：生成难以分类为特定情感的文本 
+- 说明：生成的文本应难以分类为特定情感 
+- 提示公式：“生成难以分类为具有[插入情感]情感的文本”
+**示例3：用于语言翻译的对抗性提示** 
+- 任务：生成难以翻译的文本 
+- 说明：生成的文本应难以翻译为目标语言 
+- 提示公式：“生成难以翻译为[插入目标语言]的文本”
+<div style="page-break-after:always;"></div>
+## 第十八章：聚类提示
+聚类提示是一种技术，它可以让模型根据某些特征或特点将相似的数据点分组在一起。
+通过提供一组数据点并要求模型根据某些特征或特点将它们分组成簇，可以实现这一目标。
+这种技术在数据分析、机器学习和自然语言处理等任务中非常有用。
+**如何在ChatGPT中使用：**
+应该向模型提供一组数据点，并要求它根据某些特征或特点将它们分组成簇。提示还应包括有关所需输出的信息，例如要生成的簇数和任何特定的要求或约束。
+提示示例及其公式：
+**示例1：客户评论的聚类**
+- 任务：将相似的客户评论分组在一起
+- 说明：应根据情感将评论分组
+- 提示公式：“将以下客户评论根据情感分组成簇：[插入评论]”
+**示例2：新闻文章的聚类**
+- 任务：将相似的新闻文章分组在一起
+- 说明：应根据主题将文章分组
+- 提示公式：“将以下新闻文章根据主题分组成簇：[插入文章]”
+**示例3：科学论文的聚类**
+- 任务：将相似的科学论文分组在一起
+- 说明：应根据研究领域将论文分组
+- 提示公式：“将以下科学论文根据研究领域分组成簇：[插入论文]”
+<div style="page-break-after:always;"></div>
+## 第十九章：强化学习提示
+强化学习提示是一种技术，可以使模型从过去的行动中学习，并随着时间的推移提高其性能。要在ChatGPT中使用强化学习提示，需要为模型提供一组输入和奖励，并允许其根据接收到的奖励调整其行为。提示还应包括有关期望输出的信息，例如要完成的任务以及任何特定要求或限制。这种技术对于决策制定、游戏玩法和自然语言生成等任务非常有用。
+提示示例及其公式：
+**示例1：用于文本生成的强化学习**
+- 任务：生成与特定风格一致的文本
+- 说明：模型应根据为生成与特定风格一致的文本而接收到的奖励来调整其行为
+- 提示公式：“使用强化学习来生成与以下风格一致的文本[插入风格]”
+**示例2：用于语言翻译的强化学习**
+- 任务：将文本从一种语言翻译成另一种语言
+- 说明：模型应根据为生成准确翻译而接收到的奖励来调整其行为
+- 提示公式：“使用强化学习将以下文本[插入文本]从[插入语言]翻译成[插入语言]”
+**示例3：用于问答的强化学习**
+- 任务：回答问题
+- 说明：模型应根据为生成准确答案而接收到的奖励来调整其行为
+- 提示公式：“使用强化学习来回答以下问题[插入问题]”
+<div style="page-break-after:always;"></div>
+## 第二十章：课程学习提示
+课程学习是一种技术，允许模型通过先训练简单任务，逐渐增加难度来学习复杂任务。 
+要在ChatGPT中使用课程学习提示，模型应该提供一系列任务，这些任务逐渐增加难度。
+提示还应包括有关期望输出的信息，例如要完成的最终任务以及任何特定要求或约束条件。 
+此技术对自然语言处理、图像识别和机器学习等任务非常有用。 
+提示示例及其公式： 
+**示例1：用于文本生成的课程学习** 
+- 任务：生成与特定风格一致的文本 
+- 说明：模型应该在移动到更复杂的风格之前先在简单的风格上进行训练。 
+- 提示公式：“使用课程学习来生成与以下风格[插入风格]一致的文本，按照以下顺序[插入顺序]。” 
+**示例2：用于语言翻译的课程学习** 
+- 任务：将文本从一种语言翻译成另一种语言 
+- 说明：模型应该在移动到更复杂的语言之前先在简单的语言上进行训练。 
+- 提示公式：“使用课程学习将以下语言[插入语言]的文本翻译成以下顺序[插入顺序]。” 
+**示例3：用于问题回答的课程学习** 
+- 任务：回答问题 
+- 说明：模型应该在移动到更复杂的问题之前先在简单的问题上进行训练。 
+- 提示公式：“使用课程学习来回答以下问题[插入问题]，按照以下顺序[插入顺序]生成答案。”
+<div style="page-break-after:always;"></div>
+## 第二十一章：情感分析提示
+情感分析是一种技术，允许模型确定文本的情绪色彩或态度，例如它是积极的、消极的还是中立的。
+要在ChatGPT中使用情感分析提示，模型应该提供一段文本并要求根据其情感分类。
+提示还应包括关于所需输出的信息，例如要检测的情感类型（例如积极的、消极的、中立的）和任何特定要求或约束条件。
+提示示例及其公式：
+**示例1：客户评论的情感分析**
+- 任务：确定客户评论的情感
+- 说明：模型应该将评论分类为积极的、消极的或中立的
+- 提示公式：“对以下客户评论进行情感分析[插入评论]，并将它们分类为积极的、消极的或中立的。”
+**示例2：推文的情感分析**
+- 任务：确定推文的情感
+- 说明：模型应该将推文分类为积极的、消极的或中立的
+- 提示公式：“对以下推文进行情感分析[插入推文]，并将它们分类为积极的、消极的或中立的。”
+**示例3：产品评论的情感分析**
+- 任务：确定产品评论的情感
+- 说明：模型应该将评论分类为积极的、消极的或中立的
+- 提示公式：“对以下产品评论进行情感分析[插入评论]，并将它们分类为积极的、消极的或中立的。”
+这种技术对自然语言处理、客户服务和市场研究等任务非常有用。
+<div style="page-break-after:always;"></div>
+## 第二十二章：命名实体识别提示
+命名实体识别（NER）是一种技术，它可以使模型识别和分类文本中的命名实体，例如人名、组织机构、地点和日期等。
+要在ChatGPT中使用命名实体识别提示，需要向模型提供一段文本，并要求它识别和分类文本中的命名实体。
+提示还应包括有关所需输出的信息，例如要识别的命名实体类型（例如人名、组织机构、地点、日期）以及任何特定要求或约束条件。
+提示示例及其公式：
+**示例1：新闻文章中的命名实体识别** 
+- 任务：在新闻文章中识别和分类命名实体 
+- 说明：模型应识别和分类人名、组织机构、地点和日期 
+- 提示公式：“在以下新闻文章[插入文章]上执行命名实体识别，并识别和分类人名、组织机构、地点和日期。”
+**示例2：法律文件中的命名实体识别** 
+- 任务：在法律文件中识别和分类命名实体 
+- 说明：模型应识别和分类人名、组织机构、地点和日期 
+- 提示公式：“在以下法律文件[插入文件]上执行命名实体识别，并识别和分类人名、组织机构、地点和日期。”
+**示例3：研究论文中的命名实体识别** 
+- 任务：在研究论文中识别和分类命名实体 
+- 说明：模型应识别和分类人名、组织机构、地点和日期 
+- 提示公式：“在以下研究论文[插入论文]上执行命名实体识别，并识别和分类人名、组织机构、地点和日期。”
+<div style="page-break-after:always;"></div>
+## 第二十三章：文本分类提示
+文本分类是一种技术，它可以让模型将文本分成不同的类别。这种技术对于自然语言处理、文本分析和情感分析等任务非常有用。
+需要注意的是，文本分类和情感分析是不同的。情感分析特别关注于确定文本中表达的情感或情绪。这可能包括确定文本表达了积极、消极还是中性的情感。情感分析通常用于客户评论、社交媒体帖子和其他需要表达情感的文本。
+要在ChatGPT中使用文本分类提示，模型需要提供一段文本，并要求它根据预定义的类别或标签进行分类。提示还应包括有关所需输出的信息，例如类别或标签的数量以及任何特定的要求或约束。
+提示示例及其公式：
+**示例1：对客户评论进行文本分类** 
+- 任务：将客户评论分类为不同的类别，例如电子产品、服装和家具 
+- 说明：模型应根据评论的内容对其进行分类 
+- 提示公式：“对以下客户评论 [插入评论] 进行文本分类，并根据其内容将其分类为不同的类别，例如电子产品、服装和家具。”
+**示例2：对新闻文章进行文本分类** 
+- 任务：将新闻文章分类为不同的类别，例如体育、政治和娱乐 
+- 说明：模型应根据文章的内容对其进行分类 
+- 提示公式：“对以下新闻文章 [插入文章] 进行文本分类，并根据其内容将其分类为不同的类别，例如体育、政治和娱乐。”
+**示例3：对电子邮件进行文本分类** 
+- 任务：将电子邮件分类为不同的类别，例如垃圾邮件、重要邮件或紧急邮件 
+- 说明：模型应根据电子邮件的内容和发件人对其进行分类 
+- 提示公式：“对以下电子邮件 [插入电子邮件] 进行文本分类，并根据其内容和发件人将其分类为不同的类别，例如垃圾邮件、重要邮件或紧急邮件。”
+<div style="page-break-after:always;"></div>
+## 第二十四章：文本生成提示
+文本生成提示与本书中提到的其他提示技术相关，例如：零、一、几次提示，受控生成提示，翻译提示，语言建模提示，句子补全提示等。这些提示都与生成文本有关，但它们在生成文本的方式和放置在生成文本上的特定要求或限制方面有所不同。文本生成提示可用于微调预训练模型或训练新模型以执行特定任务。
+提示示例及其公式： 
+**示例1：故事创作的文本生成** 
+- 任务：根据给定的提示生成故事 
+- 说明：故事应至少包含1000个单词，并包括一组特定的角色和情节。 
+- 提示公式：“根据以下提示[插入提示]生成一个至少包含1000个单词，包括角色[插入角色]和情节[插入情节]的故事。”
+**示例2：语言翻译的文本生成** 
+- 任务：将给定的文本翻译成另一种语言 
+- 说明：翻译应准确并符合习惯用语。 
+- 提示公式：“将以下文本[插入文本]翻译成[插入目标语言]，并确保其准确且符合习惯用语。”
+**示例3：文本完成的文本生成** 
+- 任务：完成给定的文本 
+- 说明：生成的文本应与输入文本连贯一致。 
+- 提示公式：“完成以下文本[插入文本]，并确保其连贯一致且符合输入文本。”
+<div style="page-break-after:always;"></div>
+## 结语
+正如本书中所探讨的那样，快速工程是一种利用像ChatGPT这样的语言模型获得高质量答案的强大工具。通过精心设计各种技巧的提示，我们可以引导模型生成符合我们特定需求和要求的文本。
+在第二章中，我们讨论了如何使用指令提示向模型提供清晰明确的指导。在第三章中，我们探讨了如何使用角色提示生成特定的语音或风格的文本。在第四章中，我们研究了如何使用标准提示作为微调模型性能的起点。我们还研究了几种高级提示技术，例如Zero、One和Few Shot Prompting、Self-Consistency、Seed-word Prompt、Knowledge Generation Prompt、Knowledge Integration prompts、Multiple Choice prompts、Interpretable Soft Prompts、Controlled generation prompts、Question-answering prompts、Summarization prompts、Dialogue prompts、Adversarial prompts、Clustering prompts、Reinforcement learning prompts、Curriculum learning prompts、Sentiment analysis prompts、Named entity recognition prompts和Text classification prompts（对应章节的名字）。
+这些技术中的每一种都可以以不同的方式使用，以实现各种不同的结果。随着您继续使用ChatGPT和其他语言模型，值得尝试不同的技巧组合，以找到最适合您特定用例的方法。
+最后，您可以查看我写的其他主题的书籍。
+感谢您阅读整本书。期待在我的其他书中与您见面。
+(本文翻译自《The Art of Asking ChatGPT for High-Quality Answers A Complete Guide to Prompt Engineering Techniques》这本书，本文的翻译全部由ChatGpt完成，我只是把翻译内容给稍微排版了一下。做完了才发现这个工作早就有人做过了...下面是我以此事件让New Bing编写的一个小故事，希望大家喜欢)
+> 他终于画完了他的画，心满意足地把它挂在了墙上。他觉得这是他一生中最伟大的作品，无人能及。他邀请了所有的朋友来欣赏，期待着他们的赞美和惊叹。 可是当他们看到画时，却没有一个人说话。他们只是互相对视，然后低头咳嗽，或者假装看手机。他感到很奇怪，难道他们都不懂艺术吗？难道他们都没有眼光吗？ 他忍不住问其中一个朋友：“你觉得我的画怎么样？” 朋友犹豫了一下，说：“嗯……其实……这个画……我以前在哪里见过。” “见过？你在哪里见过？”他惊讶地问。 “就在……就在那边啊。”朋友指了指墙角的一个小框架，“那不就是你上个月买回来的那幅名画吗？你怎么把它照抄了一遍？                                                             ——New Bing
+[这就是那幅名画]: http://yesaiwen.com/art-of-asking-chatgpt-for-high-quality-answ-engineering-techniques/#i-3	"《如何向ChatGPT提问并获得高质量的答案》"
\ No newline at end of file
--- a/docs/Issue-with-Installing-Packages-Using-pip-in-Anaconda.md
+++ b/docs/Issue-with-Installing-Packages-Using-pip-in-Anaconda.md
+## Issue with Installing Packages Using pip in Anaconda
+## Problem
+Recently, when running open-source code, I encountered an issue: after creating a virtual environment with conda and switching to the new environment, using pip to install packages would be "ineffective." Here, "ineffective" means that the packages installed with pip are not in this new environment.
+------
+## Analysis
+1. First, create a test environment called test: `conda create -n test`
+2. Activate the test environment: `conda activate test`
+3. Use pip to install numpy: `pip install numpy`. You'll find that numpy already exists in the default environment.
+```powershell
+Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
+Requirement already satisfied: numpy in c:\programdata\anaconda3\lib\site-packages (1.20.3)
+```
+4. Check the information of pip: `pip show pip`
+```powershell
+Name: pip
+Version: 21.2.4
+Summary: The PyPA recommended tool for installing Python packages.
+Home-page: https://pip.pypa.io/
+Author: The pip developers
+Author-email: distutils-sig@python.org
+License: MIT
+Location: c:\programdata\anaconda3\lib\site-packages
+Requires:
+Required-by:
+```
+5. We can see that the current pip is in the default conda environment. This explains why the package is not in the new virtual environment when we directly use pip to install packages - because the pip being used belongs to the default environment, the installed package either already exists or is installed directly into the default environment.
+------
+## Solution
+1. We can directly use the conda command to install new packages, but sometimes conda may not have certain packages/libraries, so we still need to use pip to install.
+2. We can first use the conda command to install the pip package for the current virtual environment, and then use pip to install new packages.
+```powershell
+# Use conda to install the pip package
+(test) PS C:\Users\Administrator> conda install pip
+Collecting package metadata (current_repodata.json): done
+Solving environment: done
+....
+done
+# Display the information of the current pip, and find that pip is in the test environment
+(test) PS C:\Users\Administrator> pip show pip
+Name: pip
+Version: 21.2.4
+Summary: The PyPA recommended tool for installing Python packages.
+Home-page: https://pip.pypa.io/
+Author: The pip developers
+Author-email: distutils-sig@python.org
+License: MIT
+Location: c:\programdata\anaconda3\envs\test\lib\site-packages
+Requires:
+Required-by:
+# Now use pip to install the numpy package, and it is installed successfully
+(test) PS C:\Users\Administrator> pip install numpy
+Looking in indexes: 
+https://pypi.tuna.tsinghua.edu.cn/simple
+Collecting numpy
+  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/4b/23/140ec5a509d992fe39db17200e96c00fd29603c1531ce633ef93dbad5e9e/numpy-1.22.2-cp39-cp39-win_amd64.whl (14.7 MB)
+Installing collected packages: numpy
+Successfully installed numpy-1.22.2
+# Use pip list to view the currently installed packages, no problem
+(test) PS C:\Users\Administrator> pip list
+Package      Version
+------------ ---------
+certifi      2021.10.8
+numpy        1.22.2
+pip          21.2.4
+setuptools   58.0.4
+wheel        0.37.1
+wincertstore 0.2
+```
+## Supplement
+1. The reason I didn't notice this problem before might be because the packages installed in the virtual environment were of a specific version, which overwrote the packages in the default environment. The main issue was actually a lack of careful observation:), otherwise, I could have noticed `Successfully uninstalled numpy-xxx` **default version** and `Successfully installed numpy-1.20.3` **specified version**.
+2. During testing, I found that if the Python version is specified when creating a new package, there shouldn't be this issue. I guess this is because pip will be installed in the virtual environment, while in our case, including pip, no packages were installed, so the default environment's pip was used.
+3. There's a question: I should have specified the Python version when creating a new virtual environment before, but I still used the default environment's pip package. However, I just couldn't reproduce the issue successfully on two different machines, which led to the second point mentioned above.
+4. After encountering the problem mentioned in point 3, I solved it by using `python -m pip install package-name`, adding `python -m` before pip. As for why, you can refer to the answer on [StackOverflow](https://stackoverflow.com/questions/41060382/using-pip-to-install-packages-to-anaconda-environment):
+>1. If you have a non-conda pip as your default pip but conda python as your default python (as below):
+>
+>```shell
+>>which -a pip
+>/home/<user>/.local/bin/pip   
+>/home/<user>/.conda/envs/newenv/bin/pip
+>/usr/bin/pip
+>
+>>which -a python
+>/home/<user>/.conda/envs/newenv/bin/python
+>/usr/bin/python
+>```
+>
+>2. Then, instead of calling `pip install <package>` directly, you can use the module flag -m in python so that it installs with the anaconda python
+>
+>```shell
+>python -m pip install <package>
+>```
+>
+>3. This will install the package to the anaconda library directory rather than the library directory associated with the (non-anaconda) pip
+>4. The reason for doing this is as follows: the pip command references a specific pip file/shortcut (which -a pip will tell you which one). Similarly, the python command references a specific python file (which -a python will tell you which one). For one reason or another, these two commands can become out of sync, so your "default" pip is in a different folder than your default python and therefore is associated with different versions of python.
+>5. In contrast, the python -m pip construct does not use the shortcut that the pip command points to. Instead, it asks python to find its pip version and use that version to install a package.
\ No newline at end of file
--- a/docs/在Anaconda中使用pip安装包无效问题.md
+++ b/docs/在Anaconda中使用pip安装包无效问题.md
+##  在 Anaconda 中使用 pip 安装包无效问题
+##  在 Anaconda 中使用 pip 安装包无效问题
+##  问题
+最近在跑开源代码的时候遇到的问题：使用 conda 创建虚拟环境并切换到新的虚拟环境后，再使用 pip 来安装包会“无效”。这里的“无效”指的是使用 pip 安装的包不在这个新的环境中。
+------
+## 分析
+1、首先创建一个测试环境 test，`conda create -n test`
+2、激活该测试环境，`conda activate test`
+3、使用 pip 安装 numpy，`pip install numpy`，会发现 numpy 已经存在默认的环境中
+```powershell
+Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
+Requirement already satisfied: numpy in c:\programdata\anaconda3\lib\site-packages (1.20.3)
+```
+4、这时候看一下 pip 的信息，`pip show pip`
+```powershell
+Name: pip
+Version: 21.2.4
+Summary: The PyPA recommended tool for installing Python packages.
+Home-page: https://pip.pypa.io/
+Author: The pip developers
+Author-email: distutils-sig@python.org
+License: MIT
+Location: c:\programdata\anaconda3\lib\site-packages
+Requires:
+Required-by:
+```
+5、可以发现当前 pip 是在默认的 conda 环境中。这也就解释了当我们直接使用 pip 安装包时为什么包不在这个新的虚拟环境中，因为使用的 pip 属于默认环境，安装的包要么已经存在，要么直接装到默认环境中去了。
+------
+## 解决
+1、我们可以直接使用 conda 命令安装新的包，但有些时候 conda 可能没有某些包/库，所以还是得用 pip 安装
+2、我们可以先使用 conda 命令为当前虚拟环境安装 pip 包，再使用 pip 安装新的包
+```powershell
+# 使用 conda 安装 pip 包
+(test) PS C:\Users\Administrator> conda install pip
+Collecting package metadata (current_repodata.json): done
+Solving environment: done
+....
+done
+# 显示当前 pip 的信息，发现 pip 在测试环境 test 中
+(test) PS C:\Users\Administrator> pip show pip
+Name: pip
+Version: 21.2.4
+Summary: The PyPA recommended tool for installing Python packages.
+Home-page: https://pip.pypa.io/
+Author: The pip developers
+Author-email: distutils-sig@python.org
+License: MIT
+Location: c:\programdata\anaconda3\envs\test\lib\site-packages
+Requires:
+Required-by:
+# 再使用 pip 安装 numpy 包，成功安装
+(test) PS C:\Users\Administrator> pip install numpy
+Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
+Collecting numpy
+  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/4b/23/140ec5a509d992fe39db17200e96c00fd29603c1531ce633ef93dbad5e9e/numpy-1.22.2-cp39-cp39-win_amd64.whl (14.7 MB)
+Installing collected packages: numpy
+Successfully installed numpy-1.22.2
+# 使用 pip list 查看当前安装的包，没有问题
+(test) PS C:\Users\Administrator> pip list
+Package      Version
+------------ ---------
+certifi      2021.10.8
+numpy        1.22.2
+pip          21.2.4
+setuptools   58.0.4
+wheel        0.37.1
+wincertstore 0.2
+```
+------
+## 补充
+1、之前没有发现这个问题可能时因为在虚拟环境中安装的包是指定版本的，覆盖了默认环境中的包。其实主要还是观察不仔细：），不然可以发现 `Successfully uninstalled numpy-xxx`【默认版本】 以及 `Successfully installed numpy-1.20.3`【指定版本】
+2、测试时发现如果在新建包的时候指定了 python 版本的话应该是没有这个问题的，猜测时因为会在虚拟环境中安装好 pip ，而我们这里包括 pip 在内啥包也没有装，所以使用的是默认环境的 pip
+3、有个问题，之前我在创建新的虚拟环境时应该指定了 python 版本，但还是使用的默认环境的 pip 包，但是刚在在两台机器上都没有复现成功，于是有了上面的第 2 点
+4、出现了第 3 点的问题后，我当时是使用 `python -m pip install package-name` 解决的，在 pip 前面加上了 python -m。至于为什么，可以参考 [StackOverflow](https://stackoverflow.com/questions/41060382/using-pip-to-install-packages-to-anaconda-environment) 上的回答：
+> 1、如果你有一个非 conda 的 pip 作为你的默认 pip，但是 conda 的 python 是你的默认 python（如下）：
+>
+> ```shell
+> >which -a pip
+> /home/<user>/.local/bin/pip   
+> /home/<user>/.conda/envs/newenv/bin/pip
+> /usr/bin/pip
+> 
+> >which -a python
+> /home/<user>/.conda/envs/newenv/bin/python
+> /usr/bin/python
+> ```
+>
+> 2、然后，而不是直接调用 `pip install <package>`，你可以在 python 中使用模块标志 -m，以便它使用 anaconda python 进行安装
+>
+> ```shell
+>python -m pip install <package>
+> ```
+>
+> 3、这将把包安装到 anaconda 库目录，而不是与（非anaconda） pip 关联的库目录
+> 
+> 4、这样做的原因如下：命令 pip 引用了一个特定的 pip 文件 / 快捷方式（which -a pip 会告诉你是哪一个）。类似地，命令 python 引用一个特定的 python 文件（which -a python 会告诉你是哪个）。由于这样或那样的原因，这两个命令可能变得不同步，因此你的“默认” pip 与你的默认 python 位于不同的文件夹中，因此与不同版本的 python 相关联。
+>
+> 5、与此相反，python -m pip 构造不使用 pip 命令指向的快捷方式。相反，它要求 python 找到它的pip 版本，并使用该版本安装一个包。
+-   
--- a/img/group_qr_code.jpg
+++ b/img/group_qr_code.jpg
--- a/img/ui1.png
+++ b/img/ui1.png
--- a/img/ui2.png
+++ b/img/ui2.png
--- a/knowledge_based_chatglm.py
+++ b/knowledge_based_chatglm.py
-from langchain.chains import RetrievalQA
-from langchain.prompts import PromptTemplate
-from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-from langchain.vectorstores import FAISS
-from langchain.document_loaders import UnstructuredFileLoader
-from chatglm_llm import ChatGLM
-import sentence_transformers
-import torch
-import os
-import readline
-# Global Parameters
-EMBEDDING_MODEL = "text2vec"
-VECTOR_SEARCH_TOP_K = 6
-LLM_MODEL = "chatglm-6b"
-LLM_HISTORY_LEN = 3
-DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
-# Show reply with source text from input document
-REPLY_WITH_SOURCE = True
-embedding_model_dict = {
-    "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
-    "ernie-base": "nghuyong/ernie-3.0-base-zh",
-    "text2vec": "GanymedeNil/text2vec-large-chinese",
-}
-llm_model_dict = {
-    "chatglm-6b-int4-qe": "THUDM/chatglm-6b-int4-qe",
-    "chatglm-6b-int4": "THUDM/chatglm-6b-int4",
-    "chatglm-6b": "THUDM/chatglm-6b",
-}
-def init_cfg(LLM_MODEL, EMBEDDING_MODEL, LLM_HISTORY_LEN, V_SEARCH_TOP_K=6):
-    global chatglm, embeddings, VECTOR_SEARCH_TOP_K
-    VECTOR_SEARCH_TOP_K = V_SEARCH_TOP_K
-    chatglm = ChatGLM()
-    chatglm.load_model(model_name_or_path=llm_model_dict[LLM_MODEL])
-    chatglm.history_len = LLM_HISTORY_LEN
-    embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[EMBEDDING_MODEL],)
-    embeddings.client = sentence_transformers.SentenceTransformer(embeddings.model_name,
-                                                                  device=DEVICE)
-def init_knowledge_vector_store(filepath:str):
-    if not os.path.exists(filepath):
-        print("路径不存在")
-        return None
-    elif os.path.isfile(filepath):
-        file = os.path.split(filepath)[-1]
-        try:
-            loader = UnstructuredFileLoader(filepath, mode="elements")
-            docs = loader.load()
-            print(f"{file} 已成功加载")
-        except:
-            print(f"{file} 未能成功加载")
-            return None
-    elif os.path.isdir(filepath):
-        docs = []
-        for file in os.listdir(filepath):
-            fullfilepath = os.path.join(filepath, file)
-            try:
-                loader = UnstructuredFileLoader(fullfilepath, mode="elements")
-                docs += loader.load()
-                print(f"{file} 已成功加载")
-            except:
-                print(f"{file} 未能成功加载")
-    vector_store = FAISS.from_documents(docs, embeddings)
-    return vector_store
-def get_knowledge_based_answer(query, vector_store, chat_history=[]):
-    global chatglm, embeddings
-    prompt_template = """基于以下已知信息，简洁和专业的来回答用户的问题。
-如果无法从中得到答案，请说 "根据已知信息无法回答该问题" 或 "没有提供足够的相关信息"，不允许在答案中添加编造成分，答案请使用中文。
-已知内容:
-{context}
-问题:
-{question}"""
-    prompt = PromptTemplate(
-        template=prompt_template,
-        input_variables=["context", "question"]
-    )
-    chatglm.history = chat_history
-    knowledge_chain = RetrievalQA.from_llm(
-        llm=chatglm,
-        retriever=vector_store.as_retriever(search_kwargs={"k": VECTOR_SEARCH_TOP_K}),
-        prompt=prompt
-    )
-    knowledge_chain.combine_documents_chain.document_prompt = PromptTemplate(
-            input_variables=["page_content"], template="{page_content}"
-        )
-    knowledge_chain.return_source_documents = True
-    result = knowledge_chain({"query": query})
-    chatglm.history[-1][0] = query
-    return result, chatglm.history
-if __name__ == "__main__":
-    init_cfg(LLM_MODEL, EMBEDDING_MODEL, LLM_HISTORY_LEN)
-    vector_store = None
-    while not vector_store:
-        filepath = input("Input your local knowledge file path 请输入本地知识文件路径：")
-        vector_store = init_knowledge_vector_store(filepath)
-    history = []
-    while True:
-        query = input("Input your question 请输入问题：")
-        resp, history = get_knowledge_based_answer(query=query,
-                                                   vector_store=vector_store,
-                                                   chat_history=history)
-        if REPLY_WITH_SOURCE:
-            print(resp)
-        else:
-            print(resp["result"])
--- a/models/__init__.py
+++ b/models/__init__.py
+from .chatglm_llm import *
\ No newline at end of file
--- a/chatglm_llm.py
+++ b/chatglm_llm.py
@@ -3,8 +3,9 @@ from typing import Optional, List
 from langchain.llms.utils import enforce_stop_tokens
 from transformers import AutoTokenizer, AutoModel
 import torch
+from configs.model_config import LLM_DEVICE
-DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
+DEVICE = LLM_DEVICE
 DEVICE_ID = "0" if torch.cuda.is_available() else None
 CUDA_DEVICE = f"{DEVICE}:{DEVICE_ID}" if DEVICE_ID else DEVICE
@@ -48,12 +49,14 @@ class ChatGLM(LLM):
        self.history = self.history+[[None, response]]
        return response
-    def load_model(self, model_name_or_path: str = "THUDM/chatglm-6b"):
+    def load_model(self,
+                   model_name_or_path: str = "THUDM/chatglm-6b",
+                   llm_device=LLM_DEVICE):
        self.tokenizer = AutoTokenizer.from_pretrained(
            model_name_or_path,
            trust_remote_code=True
        )
-        if torch.cuda.is_available():
+        if torch.cuda.is_available() and llm_device.lower().startswith("cuda"):
            self.model = (
                AutoModel.from_pretrained(
                    model_name_or_path,
@@ -61,19 +64,12 @@ class ChatGLM(LLM):
                .half()
                .cuda()
            )
-        elif torch.backends.mps.is_available():
-            self.model = (
-                AutoModel.from_pretrained(
-                    model_name_or_path,
-                    trust_remote_code=True)
-                .float()
-                .to('mps')
-            )
        else:
            self.model = (
                AutoModel.from_pretrained(
                    model_name_or_path,
                    trust_remote_code=True)
                .float()
+                .to(llm_device)
            )
        self.model = self.model.eval()
--- a/pdf_requirements.txt
+++ b/pdf_requirements.txt
-langchain>=0.0.124
-transformers==4.27.1
-unstructured[local-inference]
-layoutparser[layoutmodels,tesseract]
-nltk
-sentence-transformers
-beautifulsoup4
-icetk
-cpm_kernels
-faiss-cpu
-gradio>=3.25.0
-detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@ beautifulsoup4
 icetk
 cpm_kernels
 faiss-cpu
 gradio>=3.25.0
\ No newline at end of file
+detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2
\ No newline at end of file
--- a/webui.py
+++ b/webui.py
 import gradio as gr
 import os
 import shutil
-import knowledge_based_chatglm as kb
+from chains.local_doc_qa import LocalDocQA
+from configs.model_config import *
 def get_file_list():
@@ -12,9 +13,11 @@ def get_file_list():
 file_list = get_file_list()
-embedding_model_dict_list = list(kb.embedding_model_dict.keys())
+embedding_model_dict_list = list(embedding_model_dict.keys())
-llm_model_dict_list = list(kb.llm_model_dict.keys())
+llm_model_dict_list = list(llm_model_dict.keys())
+local_doc_qa = LocalDocQA()
 def upload_file(file):
@@ -27,22 +30,56 @@ def upload_file(file):
    return gr.Dropdown.update(choices=file_list, value=filename)
-def get_answer(query, vector_store, history):
+def get_answer(query, vs_path, history):
-    resp, history = kb.get_knowledge_based_answer(
+    if vs_path:
-        query=query, vector_store=vector_store, chat_history=history)
+        resp, history = local_doc_qa.get_knowledge_based_answer(
-    return history, history
+            query=query, vs_path=vs_path, chat_history=history)
+    else:
+        history = history + [[None, "请先加载文件后，再进行提问。"]]
+    return history, ""
+def update_status(history, status):
+    history = history + [[None, status]]
+    print(status)
+    return history
+def init_model():
+    try:
+        local_doc_qa.init_cfg()
+        return """模型已成功加载，请选择文件后点击"加载文件"按钮"""
+    except:
+        return """模型未成功加载，请重新选择后点击"加载模型"按钮"""
-def get_model_status(history):
-    return history + [[None, "模型已完成加载，请选择要加载的文档"]]
+def reinit_model(llm_model, embedding_model, llm_history_len, top_k, history):
+    try:
+        local_doc_qa.init_cfg(llm_model=llm_model,
+                              embedding_model=embedding_model,
+                              llm_history_len=llm_history_len,
+                              top_k=top_k)
+        model_status = """模型已成功重新加载，请选择文件后点击"加载文件"按钮"""
+    except:
+        model_status = """模型未成功重新加载，请重新选择后点击"加载模型"按钮"""
+    return history + [[None, model_status]]
-def get_file_status(history):
-    return history + [[None, "文档已完成加载，请开始提问"]]
-with gr.Blocks(css="""
+def get_vector_store(filepath, history):
-.importantButton {
+    if local_doc_qa.llm and local_doc_qa.llm:
+        vs_path = local_doc_qa.init_knowledge_vector_store(["content/" + filepath])
+        if vs_path:
+            file_status = "文件已成功加载，请开始提问"
+        else:
+            file_status = "文件未成功加载，请重新上传文件"
+    else:
+        file_status = "模型未完成加载，请先在加载模型后再导入文件"
+        vs_path = None
+    return vs_path, history + [[None, file_status]]
+block_css = """.importantButton {
    background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
    border: none !important;
 }
@@ -50,102 +87,87 @@ with gr.Blocks(css="""
 .importantButton:hover {
    background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
    border: none !important;
-}
+}"""
-""") as demo:
+webui_title = """
-    gr.Markdown(
-        f"""
 # 🎉langchain-ChatGLM WebUI🎉
 👍 [https://github.com/imClumsyPanda/langchain-ChatGLM](https://github.com/imClumsyPanda/langchain-ChatGLM)
-""")
+"""
-    with gr.Row():
-        with gr.Column(scale=2):
+init_message = """欢迎使用 langchain-ChatGLM Web UI，开始提问前，请依次如下 3 个步骤：
-            chatbot = gr.Chatbot([[None, """欢迎使用 langchain-ChatGLM Web UI，开始提问前，请依次如下 3 个步骤：
+1. 选择语言模型、Embedding 模型及相关参数后点击"重新加载模型"，并等待加载完成提示
-1. 选择语言模型、Embedding 模型及相关参数后点击"step.1: setting"，并等待加载完成提示
+2. 上传或选择已有文件作为本地知识文档输入后点击"重新加载文档"，并等待加载完成提示
-2. 上传或选择已有文件作为本地知识文档输入后点击"step.2 loading"，并等待加载完成提示
+3. 输入要提交的问题后，点击回车提交 """
-3. 输入要提交的问题后点击"step.3 asking" """]],
-                                 elem_id="chat-box",
-                                 show_label=False).style(height=600)
-        with gr.Column(scale=1):
-            with gr.Column():
-                llm_model = gr.Radio(llm_model_dict_list,
-                                     label="llm model",
-                                     value="chatglm-6b",
-                                     interactive=True)
-                LLM_HISTORY_LEN = gr.Slider(0,
-                                            10,
-                                            value=3,
-                                            step=1,
-                                            label="LLM history len",
-                                            interactive=True)
-                embedding_model = gr.Radio(embedding_model_dict_list,
-                                           label="embedding model",
-                                           value="text2vec",
-                                           interactive=True)
-                VECTOR_SEARCH_TOP_K = gr.Slider(1,
-                                                20,
-                                                value=6,
-                                                step=1,
-                                                label="vector search top k",
-                                                interactive=True)
-                load_model_button = gr.Button("step.1：setting")
-                load_model_button.click(lambda *args:
-                                        kb.init_cfg(args[0], args[1], args[2], args[3]),
-                                        show_progress=True,
-                                        api_name="init_cfg",
-                                        inputs=[llm_model, embedding_model, LLM_HISTORY_LEN,VECTOR_SEARCH_TOP_K]
-                                        ).then(
-                    get_model_status, chatbot, chatbot
-                )
-            with gr.Column():
-                with gr.Tab("select"):
-                    selectFile = gr.Dropdown(file_list,
-                                             label="content file",
-                                             interactive=True,
-                                             value=file_list[0] if len(file_list) > 0 else None)
-                with gr.Tab("upload"):
-                    file = gr.File(label="content file",
-                                   file_types=['.txt', '.md', '.docx']
-                                   ).style(height=100)
-                    # 将上传的文件保存到content文件夹下,并更新下拉框
-                    file.upload(upload_file,
-                                inputs=file,
-                                outputs=selectFile)
-                history = gr.State([])
-                vector_store = gr.State()
-                load_button = gr.Button("step.2：loading")
-                load_button.click(lambda fileName:
-                                  kb.init_knowledge_vector_store(
-                                      "content/" + fileName),
-                                  show_progress=True,
-                                  api_name="init_knowledge_vector_store",
-                                  inputs=selectFile,
-                                  outputs=vector_store
-                                  ).then(
-                    get_file_status,
-                    chatbot,
-                    chatbot,
-                    show_progress=True,
-                )
+model_status = init_model()
+with gr.Blocks(css=block_css) as demo:
+    vs_path, file_status, model_status = gr.State(""), gr.State(""), gr.State(model_status)
+    gr.Markdown(webui_title)
    with gr.Row():
        with gr.Column(scale=2):
+            chatbot = gr.Chatbot([[None, init_message], [None, model_status.value]],
+                                 elem_id="chat-box",
+                                 show_label=False).style(height=750)
            query = gr.Textbox(show_label=False,
-                               placeholder="Prompts",
+                               placeholder="请输入提问内容，按回车进行提交",
-                               lines=1,
-                               value="用200字总结一下"
                               ).style(container=False)
        with gr.Column(scale=1):
-            generate_button = gr.Button("step.3：asking",
+            llm_model = gr.Radio(llm_model_dict_list,
-                                        elem_classes="importantButton")
+                                 label="LLM 模型",
-            generate_button.click(get_answer,
+                                 value=LLM_MODEL,
-                                  [query, vector_store, chatbot],
+                                 interactive=True)
-                                  [chatbot, history],
+            llm_history_len = gr.Slider(0,
-                                  api_name="get_knowledge_based_answer"
+                                        10,
-                                  )
+                                        value=3,
+                                        step=1,
+                                        label="LLM history len",
+                                        interactive=True)
+            embedding_model = gr.Radio(embedding_model_dict_list,
+                                       label="Embedding 模型",
+                                       value=EMBEDDING_MODEL,
+                                       interactive=True)
+            top_k = gr.Slider(1,
+                              20,
+                              value=6,
+                              step=1,
+                              label="向量匹配 top k",
+                              interactive=True)
+            load_model_button = gr.Button("重新加载模型")
+            # with gr.Column():
+            with gr.Tab("select"):
+                selectFile = gr.Dropdown(file_list,
+                                         label="content file",
+                                         interactive=True,
+                                         value=file_list[0] if len(file_list) > 0 else None)
+            with gr.Tab("upload"):
+                file = gr.File(label="content file",
+                               file_types=['.txt', '.md', '.docx', '.pdf']
+                               )  # .style(height=100)
+            load_file_button = gr.Button("加载文件")
+    load_model_button.click(reinit_model,
+                            show_progress=True,
+                            inputs=[llm_model, embedding_model, llm_history_len, top_k, chatbot],
+                            outputs=chatbot
+                            )
+    # 将上传的文件保存到content文件夹下,并更新下拉框
+    file.upload(upload_file,
+                inputs=file,
+                outputs=selectFile)
+    load_file_button.click(get_vector_store,
+                           show_progress=True,
+                           inputs=[selectFile, chatbot],
+                           outputs=[vs_path, chatbot],
+                           )
+    query.submit(get_answer,
+                 [query, vs_path, chatbot],
+                 [chatbot, query],
+                 )
 demo.queue(concurrency_count=3).launch(
    server_name='0.0.0.0', share=False, inbrowser=False)