提交 5284fb27 作者: imClumsyPanda

Merge branch 'master' into dev

...@@ -181,7 +181,7 @@ async def delete_docs( ...@@ -181,7 +181,7 @@ async def delete_docs(
if os.path.exists(doc_path): if os.path.exists(doc_path):
os.remove(doc_path) os.remove(doc_path)
else: else:
return {"code": 1, "msg": f"document {doc_name} not found"} BaseResponse(code=1, msg=f"document {doc_name} not found")
remain_docs = await list_docs(knowledge_base_id) remain_docs = await list_docs(knowledge_base_id)
if remain_docs["code"] != 0 or len(remain_docs["data"]) == 0: if remain_docs["code"] != 0 or len(remain_docs["data"]) == 0:
...@@ -211,24 +211,30 @@ async def local_doc_chat( ...@@ -211,24 +211,30 @@ async def local_doc_chat(
): ):
vs_path = os.path.join(VS_ROOT_PATH, knowledge_base_id) vs_path = os.path.join(VS_ROOT_PATH, knowledge_base_id)
if not os.path.exists(vs_path): if not os.path.exists(vs_path):
raise ValueError(f"Knowledge base {knowledge_base_id} not found") # return BaseResponse(code=1, msg=f"Knowledge base {knowledge_base_id} not found")
return ChatMessage(
for resp, history in local_doc_qa.get_knowledge_based_answer( question=question,
query=question, vs_path=vs_path, chat_history=history, streaming=True response=f"Knowledge base {knowledge_base_id} not found",
): history=history,
pass source_documents=[],
source_documents = [ )
f"""出处 [{inum + 1}] {os.path.split(doc.metadata['source'])[-1]}:\n\n{doc.page_content}\n\n""" else:
f"""相关度:{doc.metadata['score']}\n\n""" for resp, history in local_doc_qa.get_knowledge_based_answer(
for inum, doc in enumerate(resp["source_documents"]) query=question, vs_path=vs_path, chat_history=history, streaming=True
] ):
pass
source_documents = [
f"""出处 [{inum + 1}] {os.path.split(doc.metadata['source'])[-1]}:\n\n{doc.page_content}\n\n"""
f"""相关度:{doc.metadata['score']}\n\n"""
for inum, doc in enumerate(resp["source_documents"])
]
return ChatMessage( return ChatMessage(
question=question, question=question,
response=resp["result"], response=resp["result"],
history=history, history=history,
source_documents=source_documents, source_documents=source_documents,
) )
async def chat( async def chat(
......
...@@ -39,7 +39,11 @@ def load_file(filepath, sentence_size=SENTENCE_SIZE): ...@@ -39,7 +39,11 @@ def load_file(filepath, sentence_size=SENTENCE_SIZE):
def write_check_file(filepath, docs): def write_check_file(filepath, docs):
fout = open('load_file.txt', 'a') folder_path = os.path.join(os.path.dirname(filepath), "tmp_files")
if not os.path.exists(folder_path):
os.makedirs(folder_path)
fp = os.path.join(folder_path, 'load_file.txt')
fout = open(fp, 'a')
fout.write("filepath=%s,len=%s" % (filepath, len(docs))) fout.write("filepath=%s,len=%s" % (filepath, len(docs)))
fout.write('\n') fout.write('\n')
for i in docs: for i in docs:
......
...@@ -31,13 +31,13 @@ if __name__ == "__main__": ...@@ -31,13 +31,13 @@ if __name__ == "__main__":
chat_history=history, chat_history=history,
streaming=STREAMING): streaming=STREAMING):
if STREAMING: if STREAMING:
logger.info(resp["result"][last_print_len:]) print(resp["result"][last_print_len:], end="", flush=True)
last_print_len = len(resp["result"]) last_print_len = len(resp["result"])
else: else:
logger.info(resp["result"]) print(resp["result"])
if REPLY_WITH_SOURCE: if REPLY_WITH_SOURCE:
source_text = [f"""出处 [{inum + 1}] {os.path.split(doc.metadata['source'])[-1]}:\n\n{doc.page_content}\n\n""" source_text = [f"""出处 [{inum + 1}] {os.path.split(doc.metadata['source'])[-1]}:\n\n{doc.page_content}\n\n"""
# f"""相关度:{doc.metadata['score']}\n\n""" # f"""相关度:{doc.metadata['score']}\n\n"""
for inum, doc in for inum, doc in
enumerate(resp["source_documents"])] enumerate(resp["source_documents"])]
logger.info("\n\n" + "\n\n".join(source_text)) print("\n\n" + "\n\n".join(source_text))
...@@ -30,7 +30,7 @@ class UnstructuredPaddleImageLoader(UnstructuredFileLoader): ...@@ -30,7 +30,7 @@ class UnstructuredPaddleImageLoader(UnstructuredFileLoader):
if __name__ == "__main__": if __name__ == "__main__":
filepath = "../content/samples/test.jpg" filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "content", "samples", "test.jpg")
loader = UnstructuredPaddleImageLoader(filepath, mode="elements") loader = UnstructuredPaddleImageLoader(filepath, mode="elements")
docs = loader.load() docs = loader.load()
for doc in docs: for doc in docs:
......
...@@ -46,7 +46,7 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader): ...@@ -46,7 +46,7 @@ class UnstructuredPaddlePDFLoader(UnstructuredFileLoader):
if __name__ == "__main__": if __name__ == "__main__":
filepath = "../content/samples/test.pdf" filepath = os.path.join(os.path.dirname(os.path.dirname(__file__)), "content", "samples", "test.pdf")
loader = UnstructuredPaddlePDFLoader(filepath, mode="elements") loader = UnstructuredPaddlePDFLoader(filepath, mode="elements")
docs = loader.load() docs = loader.load()
for doc in docs: for doc in docs:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论