提交 11dd2b5b 作者: imClumsyPanda

update api.py

上级 a887df17
...@@ -141,9 +141,9 @@ async def upload_files( ...@@ -141,9 +141,9 @@ async def upload_files(
if filelist: if filelist:
vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(filelist, get_vs_path(knowledge_base_id)) vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(filelist, get_vs_path(knowledge_base_id))
if len(loaded_files): if len(loaded_files):
file_status = f"已上传 {'、'.join([os.path.split(i)[-1] for i in loaded_files])} 至知识库,并已加载知识库,请开始提问" file_status = f"documents {', '.join([os.path.split(i)[-1] for i in loaded_files])} upload success"
return BaseResponse(code=200, msg=file_status) return BaseResponse(code=200, msg=file_status)
file_status = "文件未成功加载,请重新上传文件" file_status = f"documents {', '.join([os.path.split(i)[-1] for i in loaded_files])} upload fail"
return BaseResponse(code=500, msg=file_status) return BaseResponse(code=500, msg=file_status)
...@@ -176,7 +176,7 @@ async def list_docs( ...@@ -176,7 +176,7 @@ async def list_docs(
return ListDocsResponse(data=all_doc_names) return ListDocsResponse(data=all_doc_names)
async def delete_kbs( async def delete_kb(
knowledge_base_id: str = Query(..., knowledge_base_id: str = Query(...,
description="Knowledge Base Name", description="Knowledge Base Name",
example="kb1"), example="kb1"),
...@@ -189,7 +189,7 @@ async def delete_kbs( ...@@ -189,7 +189,7 @@ async def delete_kbs(
return BaseResponse(code=200, msg=f"Knowledge Base {knowledge_base_id} delete success") return BaseResponse(code=200, msg=f"Knowledge Base {knowledge_base_id} delete success")
async def delete_docs( async def delete_doc(
knowledge_base_id: str = Query(..., knowledge_base_id: str = Query(...,
description="Knowledge Base Name", description="Knowledge Base Name",
example="kb1"), example="kb1"),
...@@ -197,28 +197,72 @@ async def delete_docs( ...@@ -197,28 +197,72 @@ async def delete_docs(
None, description="doc name", example="doc_name_1.pdf" None, description="doc name", example="doc_name_1.pdf"
), ),
): ):
# TODO: 确认是否支持批量删除文件
knowledge_base_id = urllib.parse.unquote(knowledge_base_id) knowledge_base_id = urllib.parse.unquote(knowledge_base_id)
if not os.path.exists(get_folder_path(knowledge_base_id)): if not os.path.exists(get_folder_path(knowledge_base_id)):
return {"code": 1, "msg": f"Knowledge base {knowledge_base_id} not found"} return {"code": 1, "msg": f"Knowledge base {knowledge_base_id} not found"}
doc_path = get_file_path(knowledge_base_id, doc_name) doc_path = get_file_path(knowledge_base_id, doc_name)
if os.path.exists(doc_path): if os.path.exists(doc_path):
os.remove(doc_path) os.remove(doc_path)
# 删除上传的文件后重新生成知识库(FAISS)内的数据
# TODO: 删除向量库中对应文件
remain_docs = await list_docs(knowledge_base_id) remain_docs = await list_docs(knowledge_base_id)
if len(remain_docs.data) == 0: if len(remain_docs.data) == 0:
shutil.rmtree(get_folder_path(knowledge_base_id), ignore_errors=True) shutil.rmtree(get_folder_path(knowledge_base_id), ignore_errors=True)
return BaseResponse(code=200, msg=f"document {doc_name} delete success")
else: else:
local_doc_qa.init_knowledge_vector_store( status = local_doc_qa.delete_file_from_vector_store(doc_path, get_vs_path(knowledge_base_id))
get_folder_path(knowledge_base_id), get_vs_path(knowledge_base_id) if "success" in status:
) return BaseResponse(code=200, msg=f"document {doc_name} delete success")
return BaseResponse(code=200, msg=f"document {doc_name} delete success") else:
return BaseResponse(code=1, msg=f"document {doc_name} delete fail")
else: else:
return BaseResponse(code=1, msg=f"document {doc_name} not found") return BaseResponse(code=1, msg=f"document {doc_name} not found")
async def update_doc(
knowledge_base_id: str = Query(...,
description="知识库名",
example="kb1"),
old_doc: str = Query(
None, description="待删除文件名,已存储在知识库中", example="doc_name_1.pdf"
),
new_doc: UploadFile = File(description="待上传文件"),
):
knowledge_base_id = urllib.parse.unquote(knowledge_base_id)
if not os.path.exists(get_folder_path(knowledge_base_id)):
return {"code": 1, "msg": f"Knowledge base {knowledge_base_id} not found"}
doc_path = get_file_path(knowledge_base_id, old_doc)
if not os.path.exists(doc_path):
return BaseResponse(code=1, msg=f"document {old_doc} not found")
else:
os.remove(doc_path)
delete_status = local_doc_qa.delete_file_from_vector_store(doc_path, get_vs_path(knowledge_base_id))
if "fail" in delete_status:
return BaseResponse(code=1, msg=f"document {old_doc} delete failed")
else:
saved_path = get_folder_path(knowledge_base_id)
if not os.path.exists(saved_path):
os.makedirs(saved_path)
file_content = await new_doc.read() # 读取上传文件的内容
file_path = os.path.join(saved_path, new_doc.filename)
if os.path.exists(file_path) and os.path.getsize(file_path) == len(file_content):
file_status = f"document {new_doc.filename} already exists"
return BaseResponse(code=200, msg=file_status)
with open(file_path, "wb") as f:
f.write(file_content)
vs_path = get_vs_path(knowledge_base_id)
vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store([file_path], vs_path)
if len(loaded_files) > 0:
file_status = f"document {old_doc} delete and document {new_doc.filename} upload success"
return BaseResponse(code=200, msg=file_status)
else:
file_status = f"document {old_doc} success but document {new_doc.filename} upload fail"
return BaseResponse(code=500, msg=file_status)
async def local_doc_chat( async def local_doc_chat(
knowledge_base_id: str = Body(..., description="Knowledge Base Name", example="kb1"), knowledge_base_id: str = Body(..., description="Knowledge Base Name", example="kb1"),
question: str = Body(..., description="Question", example="工伤保险是什么?"), question: str = Body(..., description="Question", example="工伤保险是什么?"),
...@@ -394,8 +438,11 @@ def api_start(host, port): ...@@ -394,8 +438,11 @@ def api_start(host, port):
app.post("/local_doc_qa/upload_files", response_model=BaseResponse)(upload_files) app.post("/local_doc_qa/upload_files", response_model=BaseResponse)(upload_files)
app.post("/local_doc_qa/local_doc_chat", response_model=ChatMessage)(local_doc_chat) app.post("/local_doc_qa/local_doc_chat", response_model=ChatMessage)(local_doc_chat)
app.post("/local_doc_qa/bing_search_chat", response_model=ChatMessage)(bing_search_chat) app.post("/local_doc_qa/bing_search_chat", response_model=ChatMessage)(bing_search_chat)
app.get("/local_doc_qa/list_knowledge_base", response_model=ListDocsResponse)(list_kbs)
app.get("/local_doc_qa/list_files", response_model=ListDocsResponse)(list_docs) app.get("/local_doc_qa/list_files", response_model=ListDocsResponse)(list_docs)
app.delete("/local_doc_qa/delete_file", response_model=BaseResponse)(delete_docs) app.delete("/local_doc_qa/delete_knowledge_base", response_model=BaseResponse)(delete_kb)
app.delete("/local_doc_qa/delete_file", response_model=BaseResponse)(delete_doc)
app.post("/local_doc_qa/update_file", response_model=BaseResponse)(update_doc)
local_doc_qa = LocalDocQA() local_doc_qa = LocalDocQA()
local_doc_qa.init_cfg( local_doc_qa.init_cfg(
......
...@@ -282,6 +282,21 @@ class LocalDocQA: ...@@ -282,6 +282,21 @@ class LocalDocQA:
"source_documents": result_docs} "source_documents": result_docs}
yield response, history yield response, history
def delete_file_from_vector_store(self,
filepath: str or List[str],
vs_path):
vector_store = load_vector_store(vs_path, self.embeddings)
status = vector_store.delete_doc(filepath)
return status
def update_file_from_vector_store(self,
filepath: str or List[str],
vs_path,
docs: List[Document],):
vector_store = load_vector_store(vs_path, self.embeddings)
status = vector_store.update_doc(filepath, docs)
return status
if __name__ == "__main__": if __name__ == "__main__":
# 初始化消息 # 初始化消息
......
...@@ -108,14 +108,20 @@ class MyFAISS(FAISS, VectorStore): ...@@ -108,14 +108,20 @@ class MyFAISS(FAISS, VectorStore):
return docs return docs
def delete_doc(self, source): def delete_doc(self, source):
ids = [k for k, v in self.docstore._dict.items() if v.metadata["source"] == source] try:
for id in ids: ids = [k for k, v in self.docstore._dict.items() if v.metadata["source"] == source]
index = list(self.index_to_docstore_id.keys())[list(self.index_to_docstore_id.values()).index(id)] for id in ids:
self.index_to_docstore_id.pop(index) index = list(self.index_to_docstore_id.keys())[list(self.index_to_docstore_id.values()).index(id)]
self.docstore._dict.pop(id) self.index_to_docstore_id.pop(index)
return f"{len(ids)} docs deleted" self.docstore._dict.pop(id)
return f"docs delete success"
except:
return f"docs delete fail"
def update_doc(self, source, new_docs): def update_doc(self, source, new_docs):
delete_len = self.delete_doc(source) try:
ls = self.add_documents(new_docs) delete_len = self.delete_doc(source)
return f"{delete_len} docs deleted, {len(ls)} added", ls ls = self.add_documents(new_docs)
return f"docs update success"
except:
return f"docs update fail"
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论