add streaming option in configs/model_config.py

0e8cc0d1 · imClumsyPanda · 2ebcd136 · 0e8cc0d1 · 0e8cc0d1 · 0e8cc0d1
--- a/chains/local_doc_qa.py
+++ b/chains/local_doc_qa.py
@@ -116,10 +116,12 @@ class LocalDocQA:
                 llm_history_len: int = LLM_HISTORY_LEN,
                 llm_model: str = LLM_MODEL,
                 llm_device=LLM_DEVICE,
+                 streaming=STREAMING,
                 top_k=VECTOR_SEARCH_TOP_K,
                 use_ptuning_v2: bool = USE_PTUNING_V2
                 ):
        self.llm = ChatGLM()
+        self.llm.streaming = streaming
        self.llm.load_model(model_name_or_path=llm_model_dict[llm_model],
                            llm_device=llm_device,
                            use_ptuning_v2=use_ptuning_v2)
@@ -186,9 +188,7 @@ class LocalDocQA:
    def get_knowledge_based_answer(self,
                                   query,
                                   vs_path,
-                                   chat_history=[],
+                                   chat_history=[]):
-                                   streaming=True):
-        self.llm.streaming = streaming
        vector_store = FAISS.load_local(vs_path, self.embeddings)
        FAISS.similarity_search_with_score_by_vector = similarity_search_with_score_by_vector
        vector_store.chunk_size=self.chunk_size
@@ -197,7 +197,7 @@ class LocalDocQA:
        related_docs = get_docs_with_score(related_docs_with_score)
        prompt = generate_prompt(related_docs, query)
-        if streaming:
+        if self.llm.streaming:
            for result, history in self.llm._call(prompt=prompt,
                                                  history=chat_history):
                history[-1][0] = query

--- a/configs/model_config.py
+++ b/configs/model_config.py
@@ -27,6 +27,9 @@ llm_model_dict = {
 # LLM model name
 LLM_MODEL = "chatglm-6b"
+# LLM streaming reponse
+STREAMING = True
 # Use p-tuning-v2 PrefixEncoder
 USE_PTUNING_V2 = False

--- a/webui.py
+++ b/webui.py
@@ -30,8 +30,8 @@ local_doc_qa = LocalDocQA()
 def get_answer(query, vs_path, history, mode):
-    if mode == "知识库问答":
+    if mode == "知识库问答" and vs_path:
-        if vs_path:
+        if local_doc_qa.llm.streaming:
            for resp, history in local_doc_qa.get_knowledge_based_answer(
                    query=query, vs_path=vs_path, chat_history=history):
                source = "\n\n"
@@ -44,14 +44,28 @@ def get_answer(query, vs_path, history, mode):
                history[-1][-1] += source
                yield history, ""
        else:
+            resp, history = local_doc_qa.get_knowledge_based_answer(
+                query=query, vs_path=vs_path, chat_history=history)
+            source = "\n\n"
+            source += "".join(
+                [f"""<details> <summary>出处 [{i + 1}] {os.path.split(doc.metadata["source"])[-1]}</summary>\n"""
+                 f"""{doc.page_content}\n"""
+                 f"""</details>"""
+                 for i, doc in
+                 enumerate(resp["source_documents"])])
+            history[-1][-1] += source
+            return history, ""
+    else:
+        if local_doc_qa.llm.streaming:
            for resp, history in local_doc_qa.llm._call(query, history):
                history[-1][-1] = resp + (
                    "\n\n当前知识库为空，如需基于知识库进行问答，请先加载知识库后，再进行提问。" if mode == "知识库问答" else "")
                yield history, ""
-    else:
+        else:
-        for resp, history in local_doc_qa.llm._call(query, history):
+            resp, history = local_doc_qa.llm._call(query, history)
-            history[-1][-1] = resp
+            history[-1][-1] = resp + (
-            yield history, ""
+                "\n\n当前知识库为空，如需基于知识库进行问答，请先加载知识库后，再进行提问。" if mode == "知识库问答" else "")
+            return history, ""
 def update_status(history, status):
@@ -62,7 +76,7 @@ def update_status(history, status):
 def init_model():
    try:
-        local_doc_qa.init_cfg()
+        local_doc_qa.init_cfg(streaming=STREAMING)
        local_doc_qa.llm._call("你好")
        reply = """模型已成功加载，可以开始对话，或从右侧选择模式后开始对话"""
        print(reply)
@@ -84,7 +98,8 @@ def reinit_model(llm_model, embedding_model, llm_history_len, use_ptuning_v2, to
                              embedding_model=embedding_model,
                              llm_history_len=llm_history_len,
                              use_ptuning_v2=use_ptuning_v2,
-                              top_k=top_k)
+                              top_k=top_k,
+                              streaming=STREAMING)
        model_status = """模型已成功重新加载，可以开始对话，或从右侧选择模式后开始对话"""
        print(model_status)
    except Exception as e: