提交 b352c29d 作者: glide-the

llm

上级 218aca2e
{
{
......@@ -10,12 +10,12 @@
"name": "stderr",
"output_type": "stream",
"text": [
"INFO 2023-06-01 20:26:48,576-1d: \n",
"INFO 2023-06-09 20:52:01,296-1d: \n",
"loading model config\n",
"llm device: cuda\n",
"embedding device: cuda\n",
"dir: /media/gpt4-pdf-chatbot-langchain/dev-langchain-ChatGLM\n",
"flagging username: 7daba79785044bceb6896b9e6f8f9894\n",
"flagging username: 35d96e513c5347dbb0c1d7c2fb21cbd4\n",
"\n"
]
}
......@@ -42,7 +42,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "68978c38-c0e9-4ae9-ba90-9c02aca335be",
"metadata": {},
"outputs": [
......@@ -50,7 +50,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Loading vicuna-7b-hf...\n"
"Loading vicuna-13b-hf...\n"
]
},
{
......@@ -84,12 +84,12 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9b61d05e18044b009c72b862c84ab5cb",
"model_id": "9df1856e06d1460683851a0b73537a6d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
"Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
]
},
"metadata": {},
......@@ -99,7 +99,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded the model in 6.39 seconds.\n"
"Loaded the model in 11.22 seconds.\n"
]
}
],
......@@ -110,19 +110,18 @@
"from langchain.agents import initialize_agent, Tool\n",
"from langchain.agents import AgentType\n",
" \n",
"args = parser.parse_args(args=['--model-dir', '/media/checkpoint/', '--model', 'vicuna-7b-hf', '--no-remote-model', '--load-in-8bit'])\n",
"args = parser.parse_args(args=['--model', 'vicuna-13b-hf', '--no-remote-model', '--load-in-8bit'])\n",
"\n",
"args_dict = vars(args)\n",
"\n",
"shared.loaderCheckPoint = LoaderCheckPoint(args_dict)\n",
"torch.cuda.empty_cache()\n",
"shared.loaderCheckPoint.unload_model()\n",
"shared.loaderCheckPoint.reload_model() \n"
"llm=shared.loaderLLM() \n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 3,
"id": "c8e4a58d-1a3a-484a-8417-bcec0eb7170e",
"metadata": {},
"outputs": [
......@@ -130,7 +129,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"{'action': 'State of Dialogue History System', 'action_input': '露ᥫᩣ,'}\n"
"{'action': 'summary', 'action_input': '露ᥫᩣ,'}\n"
]
}
],
......@@ -188,10 +187,178 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"id": "a55f92ce-4ebf-4cb3-8e16-780c14b6517f",
"metadata": {},
"outputs": [],
"source": [
"from langchain.tools import StructuredTool\n",
"\n",
"def multiplier(a: float, b: float) -> float:\n",
" \"\"\"Multiply the provided floats.\"\"\"\n",
" return a * b\n",
"\n",
"tool = StructuredTool.from_function(multiplier)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "e089a828-b662-4d9a-8d88-4bf95ccadbab",
"metadata": {},
"outputs": [],
"source": [
"from langchain import OpenAI\n",
"from langchain.agents import initialize_agent, AgentType\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d4ea7f0e-1ba9-4f40-82ec-7c453bd64945",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"# Structured tools are compatible with the STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION agent type. \n",
"agent_executor = initialize_agent([tool], llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "640bfdfb-41e7-4429-9718-8fa724de12b7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"__call:System: Respond to the human as helpfully and accurately as possible. You have access to the following tools:\n",
"\n",
"multiplier: multiplier(a: float, b: float) -> float - Multiply the provided floats., args: {{'a': {{'title': 'A', 'type': 'number'}}, 'b': {{'title': 'B', 'type': 'number'}}}}\n",
"\n",
"Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).\n",
"\n",
"Valid \"action\" values: \"Final Answer\" or multiplier\n",
"\n",
"Provide only ONE action per $JSON_BLOB, as shown:\n",
"\n",
"```\n",
"{\n",
" \"action\": $TOOL_NAME,\n",
" \"action_input\": $INPUT\n",
"}\n",
"```\n",
"\n",
"Follow this format:\n",
"\n",
"Question: input question to answer\n",
"Thought: consider previous and subsequent steps\n",
"Action:\n",
"```\n",
"$JSON_BLOB\n",
"```\n",
"Observation: action result\n",
"... (repeat Thought/Action/Observation N times)\n",
"Thought: I know what to respond\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"Final Answer\",\n",
" \"action_input\": \"Final response to human\"\n",
"}\n",
"```\n",
"\n",
"Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.\n",
"Thought:\n",
"Human: What is 1 times 14\n",
"\n",
"\n",
"response:System: ```{\"action\":\"multiplier\",\"action_input\":{\"a\":1,\"b\":14}}``\n",
"\n",
"Observation:\n",
"\u001b[32;1m\u001b[1;3mSystem: ```{\"action\":\"multiplier\",\"action_input\":{\"a\":1,\"b\":14}}``\n",
"\n",
"Observation:\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m14.0\u001b[0m\n",
"Thought:__call:System: Respond to the human as helpfully and accurately as possible. You have access to the following tools:\n",
"\n",
"multiplier: multiplier(a: float, b: float) -> float - Multiply the provided floats., args: {{'a': {{'title': 'A', 'type': 'number'}}, 'b': {{'title': 'B', 'type': 'number'}}}}\n",
"\n",
"Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).\n",
"\n",
"Valid \"action\" values: \"Final Answer\" or multiplier\n",
"\n",
"Provide only ONE action per $JSON_BLOB, as shown:\n",
"\n",
"```\n",
"{\n",
" \"action\": $TOOL_NAME,\n",
" \"action_input\": $INPUT\n",
"}\n",
"```\n",
"\n",
"Follow this format:\n",
"\n",
"Question: input question to answer\n",
"Thought: consider previous and subsequent steps\n",
"Action:\n",
"```\n",
"$JSON_BLOB\n",
"```\n",
"Observation: action result\n",
"... (repeat Thought/Action/Observation N times)\n",
"Thought: I know what to respond\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"Final Answer\",\n",
" \"action_input\": \"Final response to human\"\n",
"}\n",
"```\n",
"\n",
"Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.\n",
"Thought:\n",
"Human: What is 1 times 14\n",
"\n",
"This was your previous work (but I haven't seen any of it! I only see what you return as final answer):\n",
"System: ```{\"action\":\"multiplier\",\"action_input\":{\"a\":1,\"b\":14}}``\n",
"\n",
"Observation:\n",
"Observation: 14.0\n",
"Thought:\n",
"response:\n",
"\u001b[32;1m\u001b[1;3m\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"''"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent_executor.run(\"What is 1 times 14\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9baa881f-5ff2-4958-b3a2-1653a5e8bc3b",
"metadata": {},
"outputs": [],
"source": []
}
],
......
......@@ -22,7 +22,7 @@ class InvalidScoreLogitsProcessor(LogitsProcessor):
class LLamaLLM(BaseAnswer, LLM, ABC):
checkPoint: LoaderCheckPoint = None
history = []
# history = []
history_len: int = 3
max_new_tokens: int = 500
num_beams: int = 1
......@@ -88,9 +88,16 @@ class LLamaLLM(BaseAnswer, LLM, ABC):
return reply
# 将历史对话数组转换为文本格式
def history_to_text(self, query):
def history_to_text(self, query, history):
"""
历史对话软提示
这段代码首先定义了一个名为 history_to_text 的函数,用于将 self.history
数组转换为所需的文本格式。然后,我们将格式化后的历史文本
再用 self.encode 将其转换为向量表示。最后,将历史对话向量与当前输入的对话向量拼接在一起。
:return:
"""
formatted_history = ''
history = self.history[-self.history_len:] if self.history_len > 0 else []
history = history[-self.history_len:] if self.history_len > 0 else []
for i, (old_query, response) in enumerate(history):
formatted_history += "[Round {}]\n问:{}\n答:{}\n".format(i, old_query, response)
formatted_history += "[Round {}]\n问:{}\n答:".format(len(history), query)
......@@ -116,20 +123,6 @@ class LLamaLLM(BaseAnswer, LLM, ABC):
return input_ids, position_ids, attention_mask
def generate_softprompt_history_tensors(self, query):
"""
历史对话软提示
这段代码首先定义了一个名为 history_to_text 的函数,用于将 self.history
数组转换为所需的文本格式。然后,我们将格式化后的历史文本
再用 self.encode 将其转换为向量表示。最后,将历史对话向量与当前输入的对话向量拼接在一起。
:return:
"""
# 对话内容
# 处理历史对话
formatted_history = self.history_to_text(query)
return formatted_history
@property
def _history_len(self) -> int:
return self.history_len
......@@ -173,18 +166,18 @@ class LLamaLLM(BaseAnswer, LLM, ABC):
new_tokens = len(output_ids[0]) - len(input_ids[0])
reply = self.decode(output_ids[0][-new_tokens:])
print(f"response:{reply}")
self.history = self.history + [[None, reply]]
print(f"+++++++++++++++++++++++++++++++++++")
return reply
def generatorAnswer(self, prompt: str,
history: List[List[str]] = [],
streaming: bool = False):
if history:
self.history = history
# TODO 需要实现chat对话模块和注意力模型,目前_call为langchain的LLM拓展的api,默认为无提示词模式,如果需要操作注意力模型,可以参考chat_glm的实现
softprompt = self.generate_softprompt_history_tensors(prompt)
softprompt = self.history_to_text(prompt,history=history)
response = self._call(prompt=softprompt, stop=['\n###'])
answer_result = AnswerResult()
answer_result.history = self.history
answer_result.history = history + [[None, response]]
answer_result.llm_output = {"answer": response}
yield answer_result
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论