提交 12ee17f3 作者: imClumsyPanda

use RetrievalQA instead of ChatVectorDBChain

上级 2240ed1e
...@@ -16,28 +16,14 @@ def torch_gc(): ...@@ -16,28 +16,14 @@ def torch_gc():
torch.cuda.ipc_collect() torch.cuda.ipc_collect()
tokenizer = AutoTokenizer.from_pretrained(
"/Users/liuqian/Downloads/ChatGLM-6B/chatglm_hf_model",
# "THUDM/chatglm-6b",
trust_remote_code=True
)
model = (
AutoModel.from_pretrained(
"/Users/liuqian/Downloads/ChatGLM-6B/chatglm_hf_model",
# "THUDM/chatglm-6b",
trust_remote_code=True)
.float()
.to("mps")
# .half()
# .cuda()
)
class ChatGLM(LLM): class ChatGLM(LLM):
max_token: int = 10000 max_token: int = 10000
temperature: float = 0.1 temperature: float = 0.1
top_p = 0.9 top_p = 0.9
history = [] history = []
tokenizer: object = None
model: object = None
history_len: int = 10
def __init__(self): def __init__(self):
super().__init__() super().__init__()
...@@ -49,31 +35,29 @@ class ChatGLM(LLM): ...@@ -49,31 +35,29 @@ class ChatGLM(LLM):
def _call(self, def _call(self,
prompt: str, prompt: str,
stop: Optional[List[str]] = None) -> str: stop: Optional[List[str]] = None) -> str:
response, updated_history = model.chat( response, _ = self.model.chat(
tokenizer, self.tokenizer,
prompt, prompt,
history=self.history, history=self.history[-self.history_len:],
max_length=self.max_token, max_length=self.max_token,
temperature=self.temperature, temperature=self.temperature,
) )
torch_gc() torch_gc()
print("history: ", self.history)
if stop is not None: if stop is not None:
response = enforce_stop_tokens(response, stop) response = enforce_stop_tokens(response, stop)
self.history = updated_history self.history = self.history+[[None, response]]
return response return response
def get_num_tokens(self, text: str) -> int: def load_model(self,
tokenized_text = tokenizer.tokenize(text) model_name_or_path: str = "THUDM/chatglm-6b"):
return len(tokenized_text) self.tokenizer = AutoTokenizer.from_pretrained(
model_name_or_path,
if __name__ == "__main__": trust_remote_code=True
history = [] )
while True: self.model = (
query = input("Input your question 请输入问题:") AutoModel.from_pretrained(
resp, history = model.chat(tokenizer, model_name_or_path,
query, trust_remote_code=True)
history=history, .half()
temperature=0.01, .cuda()
max_length=100000) )
print(resp)
\ No newline at end of file
from langchain.prompts.prompt import PromptTemplate from langchain.chains import RetrievalQA
from langchain.chains import ChatVectorDBChain, ConversationalRetrievalChain
from langchain.prompts.chat import ( from langchain.prompts.chat import (
ChatPromptTemplate, ChatPromptTemplate,
SystemMessagePromptTemplate, SystemMessagePromptTemplate,
...@@ -10,19 +9,34 @@ from langchain.vectorstores import FAISS ...@@ -10,19 +9,34 @@ from langchain.vectorstores import FAISS
from langchain.document_loaders import UnstructuredFileLoader from langchain.document_loaders import UnstructuredFileLoader
from chatglm_llm import ChatGLM from chatglm_llm import ChatGLM
# Global Parameters
EMBEDDING_MODEL = "text2vec"
VECTOR_SEARCH_TOP_K = 6
LLM_MODEL = "chatglm-6b"
LLM_HISTORY_LEN = 3
# Show reply with source text from input document
REPLY_WITH_SOURCE = True
embedding_model_dict = { embedding_model_dict = {
"ernie-tiny": "nghuyong/ernie-3.0-nano-zh", "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
"ernie-base": "nghuyong/ernie-3.0-base-zh", "ernie-base": "nghuyong/ernie-3.0-base-zh",
"text2vec": "/Users/liuqian/Downloads/ChatGLM-6B/chatglm_embedding"#"GanymedeNil/text2vec-large-chinese" "text2vec": "GanymedeNil/text2vec-large-chinese",
} }
llm_model_dict = {
"chatglm-6b": "THUDM/chatglm-6b",
"chatglm-6b-int4": "THUDM/chatglm-6b-int4",
"chatglm-6b-int4-qe":"THUDM/chatglm-6b-int4-qe",
}
chatglm = ChatGLM() chatglm = ChatGLM()
chatglm.load_model(model_name_or_path=llm_model_dict[LLM_MODEL])
chatglm.history_len = LLM_HISTORY_LEN
def init_knowledge_vector_store(filepath): def init_knowledge_vector_store(filepath):
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict["text2vec"], ) embeddings = HuggingFaceEmbeddings(model_name=embedding_model_dict[EMBEDDING_MODEL], )
loader = UnstructuredFileLoader(filepath, mode="elements") loader = UnstructuredFileLoader(filepath, mode="elements")
docs = loader.load() docs = loader.load()
...@@ -43,28 +57,17 @@ def get_knowledge_based_answer(query, vector_store, chat_history=[]): ...@@ -43,28 +57,17 @@ def get_knowledge_based_answer(query, vector_store, chat_history=[]):
] ]
prompt = ChatPromptTemplate.from_messages(messages) prompt = ChatPromptTemplate.from_messages(messages)
condese_propmt_template = """任务: 给一段对话和一个后续问题,将后续问题改写成一个独立的问题。确保问题是完整的,没有模糊的指代。
----------------
聊天记录:
{chat_history}
----------------
后续问题:{question}
----------------
改写后的独立、完整的问题:"""
new_question_prompt = PromptTemplate.from_template(condese_propmt_template)
chatglm.history = chat_history chatglm.history = chat_history
knowledge_chain = ConversationalRetrievalChain.from_llm( knowledge_chain = RetrievalQA.from_llm(
llm=chatglm, llm=chatglm,
retriever=vector_store.as_retriever(), retriever=vector_store.as_retriever(search_kwargs={"k": VECTOR_SEARCH_TOP_K}),
qa_prompt=prompt, prompt=prompt
condense_question_prompt=new_question_prompt,
) )
knowledge_chain.return_source_documents = True knowledge_chain.return_source_documents = True
# knowledge_chain.top_k_docs_for_context = 10
knowledge_chain.max_tokens_limit = 10000
result = knowledge_chain({"question": query, "chat_history": chat_history}) result = knowledge_chain({"query": query})
chatglm.history[-1][0] = query
return result, chatglm.history return result, chatglm.history
...@@ -77,4 +80,7 @@ if __name__ == "__main__": ...@@ -77,4 +80,7 @@ if __name__ == "__main__":
resp, history = get_knowledge_based_answer(query=query, resp, history = get_knowledge_based_answer(query=query,
vector_store=vector_store, vector_store=vector_store,
chat_history=history) chat_history=history)
if REPLY_WITH_SOURCE:
print(resp) print(resp)
else:
print(resp["result"])
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论