Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
J
jinchat-server
概览
概览
详情
活动
周期分析
版本库
存储库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
aigc-pioneer
jinchat-server
Commits
fc6d4c33
提交
fc6d4c33
authored
6月 13, 2023
作者:
imClumsyPanda
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add delete_doc and update_doc to MyFAISS.py
上级
ecd7b613
显示空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
20 行增加
和
2 行删除
+20
-2
MyFAISS.py
vectorstores/MyFAISS.py
+20
-2
没有找到文件。
vectorstores/MyFAISS.py
浏览文件 @
fc6d4c33
...
@@ -46,6 +46,7 @@ class MyFAISS(FAISS, VectorStore):
...
@@ -46,6 +46,7 @@ class MyFAISS(FAISS, VectorStore):
docs
=
[]
docs
=
[]
id_set
=
set
()
id_set
=
set
()
store_len
=
len
(
self
.
index_to_docstore_id
)
store_len
=
len
(
self
.
index_to_docstore_id
)
rearrange_id_list
=
False
for
j
,
i
in
enumerate
(
indices
[
0
]):
for
j
,
i
in
enumerate
(
indices
[
0
]):
if
i
==
-
1
or
0
<
self
.
score_threshold
<
scores
[
0
][
j
]:
if
i
==
-
1
or
0
<
self
.
score_threshold
<
scores
[
0
][
j
]:
# This happens when not enough docs are returned.
# This happens when not enough docs are returned.
...
@@ -53,11 +54,13 @@ class MyFAISS(FAISS, VectorStore):
...
@@ -53,11 +54,13 @@ class MyFAISS(FAISS, VectorStore):
_id
=
self
.
index_to_docstore_id
[
i
]
_id
=
self
.
index_to_docstore_id
[
i
]
doc
=
self
.
docstore
.
search
(
_id
)
doc
=
self
.
docstore
.
search
(
_id
)
if
(
not
self
.
chunk_conent
)
or
(
"context_expand"
in
doc
.
metadata
and
not
doc
.
metadata
[
"context_expand"
]):
if
(
not
self
.
chunk_conent
)
or
(
"context_expand"
in
doc
.
metadata
and
not
doc
.
metadata
[
"context_expand"
]):
# 匹配出的文本如果不需要扩展上下文则执行如下代码
if
not
isinstance
(
doc
,
Document
):
if
not
isinstance
(
doc
,
Document
):
raise
ValueError
(
f
"Could not find document for id {_id}, got {doc}"
)
raise
ValueError
(
f
"Could not find document for id {_id}, got {doc}"
)
doc
.
metadata
[
"score"
]
=
int
(
scores
[
0
][
j
])
doc
.
metadata
[
"score"
]
=
int
(
scores
[
0
][
j
])
docs
.
append
(
doc
)
docs
.
append
(
doc
)
continue
continue
id_set
.
add
(
i
)
id_set
.
add
(
i
)
docs_len
=
len
(
doc
.
page_content
)
docs_len
=
len
(
doc
.
page_content
)
for
k
in
range
(
1
,
max
(
i
,
store_len
-
i
)):
for
k
in
range
(
1
,
max
(
i
,
store_len
-
i
)):
...
@@ -72,15 +75,17 @@ class MyFAISS(FAISS, VectorStore):
...
@@ -72,15 +75,17 @@ class MyFAISS(FAISS, VectorStore):
if
l
not
in
id_set
and
0
<=
l
<
len
(
self
.
index_to_docstore_id
):
if
l
not
in
id_set
and
0
<=
l
<
len
(
self
.
index_to_docstore_id
):
_id0
=
self
.
index_to_docstore_id
[
l
]
_id0
=
self
.
index_to_docstore_id
[
l
]
doc0
=
self
.
docstore
.
search
(
_id0
)
doc0
=
self
.
docstore
.
search
(
_id0
)
if
docs_len
+
len
(
doc0
.
page_content
)
>
self
.
chunk_size
or
doc0
.
metadata
[
"source"
]
!=
doc
.
metadata
[
"source"
]:
if
docs_len
+
len
(
doc0
.
page_content
)
>
self
.
chunk_size
or
doc0
.
metadata
[
"source"
]
!=
\
doc
.
metadata
[
"source"
]:
break_flag
=
True
break_flag
=
True
break
break
elif
doc0
.
metadata
[
"source"
]
==
doc
.
metadata
[
"source"
]:
elif
doc0
.
metadata
[
"source"
]
==
doc
.
metadata
[
"source"
]:
docs_len
+=
len
(
doc0
.
page_content
)
docs_len
+=
len
(
doc0
.
page_content
)
id_set
.
add
(
l
)
id_set
.
add
(
l
)
rearrange_id_list
=
True
if
break_flag
:
if
break_flag
:
break
break
if
(
not
self
.
chunk_conent
)
or
(
"add_context"
in
doc
.
metadata
and
not
doc
.
metadata
[
"add_context"
]
):
if
(
not
self
.
chunk_conent
)
or
(
not
rearrange_id_list
):
return
docs
return
docs
if
len
(
id_set
)
==
0
and
self
.
score_threshold
>
0
:
if
len
(
id_set
)
==
0
and
self
.
score_threshold
>
0
:
return
[]
return
[]
...
@@ -101,3 +106,16 @@ class MyFAISS(FAISS, VectorStore):
...
@@ -101,3 +106,16 @@ class MyFAISS(FAISS, VectorStore):
doc
.
metadata
[
"score"
]
=
int
(
doc_score
)
doc
.
metadata
[
"score"
]
=
int
(
doc_score
)
docs
.
append
(
doc
)
docs
.
append
(
doc
)
return
docs
return
docs
def
delete_doc
(
self
,
source
):
ids
=
[
k
for
k
,
v
in
self
.
docstore
.
_dict
.
items
()
if
v
.
metadata
[
"source"
]
==
source
]
for
id
in
ids
:
index
=
list
(
self
.
index_to_docstore_id
.
keys
())[
list
(
self
.
index_to_docstore_id
.
values
())
.
index
(
id
)]
self
.
index_to_docstore_id
.
pop
(
index
)
self
.
docstore
.
_dict
.
pop
(
id
)
return
f
"{len(ids)} docs deleted"
def
update_doc
(
self
,
source
,
new_docs
):
delete_len
=
self
.
delete_doc
(
source
)
ls
=
self
.
add_documents
(
new_docs
)
return
f
"{delete_len} docs deleted, {len(ls)} added"
,
ls
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论