Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
J
jinchat-server
概览
概览
详情
活动
周期分析
版本库
存储库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
aigc-pioneer
jinchat-server
Commits
33b978b5
提交
33b978b5
authored
7月 17, 2023
作者:
imClumsyPanda
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update README.md
上级
aa944f6e
显示空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
0 行增加
和
155 行删除
+0
-155
embeddings.py
chains/modules/embeddings.py
+0
-34
vectorstores.py
chains/modules/vectorstores.py
+0
-121
qr_code_42.jpg
img/qr_code_42.jpg
+0
-0
没有找到文件。
chains/modules/embeddings.py
deleted
100644 → 0
浏览文件 @
aa944f6e
from
langchain.embeddings.huggingface
import
HuggingFaceEmbeddings
from
typing
import
Any
,
List
class
MyEmbeddings
(
HuggingFaceEmbeddings
):
def
__init__
(
self
,
**
kwargs
:
Any
):
super
()
.
__init__
(
**
kwargs
)
def
embed_documents
(
self
,
texts
:
List
[
str
])
->
List
[
List
[
float
]]:
"""Compute doc embeddings using a HuggingFace transformer model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
texts
=
list
(
map
(
lambda
x
:
x
.
replace
(
"
\n
"
,
" "
),
texts
))
embeddings
=
self
.
client
.
encode
(
texts
,
normalize_embeddings
=
True
)
return
embeddings
.
tolist
()
def
embed_query
(
self
,
text
:
str
)
->
List
[
float
]:
"""Compute query embeddings using a HuggingFace transformer model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
text
=
text
.
replace
(
"
\n
"
,
" "
)
embedding
=
self
.
client
.
encode
(
text
,
normalize_embeddings
=
True
)
return
embedding
.
tolist
()
chains/modules/vectorstores.py
deleted
100644 → 0
浏览文件 @
aa944f6e
from
langchain.vectorstores
import
FAISS
from
typing
import
Any
,
Callable
,
List
,
Optional
,
Tuple
,
Dict
from
langchain.docstore.document
import
Document
from
langchain.docstore.base
import
Docstore
from
langchain.vectorstores.utils
import
maximal_marginal_relevance
from
langchain.embeddings.base
import
Embeddings
import
uuid
from
langchain.docstore.in_memory
import
InMemoryDocstore
import
numpy
as
np
def
dependable_faiss_import
()
->
Any
:
"""Import faiss if available, otherwise raise error."""
try
:
import
faiss
except
ImportError
:
raise
ValueError
(
"Could not import faiss python package. "
"Please install it with `pip install faiss` "
"or `pip install faiss-cpu` (depending on Python version)."
)
return
faiss
class
FAISSVS
(
FAISS
):
def
__init__
(
self
,
embedding_function
:
Callable
[
...
,
Any
],
index
:
Any
,
docstore
:
Docstore
,
index_to_docstore_id
:
Dict
[
int
,
str
]):
super
()
.
__init__
(
embedding_function
,
index
,
docstore
,
index_to_docstore_id
)
def
max_marginal_relevance_search_by_vector
(
self
,
embedding
:
List
[
float
],
k
:
int
=
4
,
fetch_k
:
int
=
20
,
**
kwargs
:
Any
)
->
List
[
Tuple
[
Document
,
float
]]:
"""Return docs selected using the maximal marginal relevance.
Maximal marginal relevance optimizes for similarity to query AND diversity
among selected documents.
Args:
embedding: Embedding to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
Returns:
List of Documents with scores selected by maximal marginal relevance.
"""
scores
,
indices
=
self
.
index
.
search
(
np
.
array
([
embedding
],
dtype
=
np
.
float32
),
fetch_k
)
# -1 happens when not enough docs are returned.
embeddings
=
[
self
.
index
.
reconstruct
(
int
(
i
))
for
i
in
indices
[
0
]
if
i
!=
-
1
]
mmr_selected
=
maximal_marginal_relevance
(
np
.
array
([
embedding
],
dtype
=
np
.
float32
),
embeddings
,
k
=
k
)
selected_indices
=
[
indices
[
0
][
i
]
for
i
in
mmr_selected
]
selected_scores
=
[
scores
[
0
][
i
]
for
i
in
mmr_selected
]
docs
=
[]
for
i
,
score
in
zip
(
selected_indices
,
selected_scores
):
if
i
==
-
1
:
# This happens when not enough docs are returned.
continue
_id
=
self
.
index_to_docstore_id
[
i
]
doc
=
self
.
docstore
.
search
(
_id
)
if
not
isinstance
(
doc
,
Document
):
raise
ValueError
(
f
"Could not find document for id {_id}, got {doc}"
)
docs
.
append
((
doc
,
score
))
return
docs
def
max_marginal_relevance_search
(
self
,
query
:
str
,
k
:
int
=
4
,
fetch_k
:
int
=
20
,
**
kwargs
:
Any
,
)
->
List
[
Tuple
[
Document
,
float
]]:
"""Return docs selected using the maximal marginal relevance.
Maximal marginal relevance optimizes for similarity to query AND diversity
among selected documents.
Args:
query: Text to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
Returns:
List of Documents with scores selected by maximal marginal relevance.
"""
embedding
=
self
.
embedding_function
(
query
)
docs
=
self
.
max_marginal_relevance_search_by_vector
(
embedding
,
k
,
fetch_k
)
return
docs
@classmethod
def
__from
(
cls
,
texts
:
List
[
str
],
embeddings
:
List
[
List
[
float
]],
embedding
:
Embeddings
,
metadatas
:
Optional
[
List
[
dict
]]
=
None
,
**
kwargs
:
Any
,
)
->
FAISS
:
faiss
=
dependable_faiss_import
()
index
=
faiss
.
IndexFlatIP
(
len
(
embeddings
[
0
]))
index
.
add
(
np
.
array
(
embeddings
,
dtype
=
np
.
float32
))
# # my code, for speeding up search
# quantizer = faiss.IndexFlatL2(len(embeddings[0]))
# index = faiss.IndexIVFFlat(quantizer, len(embeddings[0]), 100)
# index.train(np.array(embeddings, dtype=np.float32))
# index.add(np.array(embeddings, dtype=np.float32))
documents
=
[]
for
i
,
text
in
enumerate
(
texts
):
metadata
=
metadatas
[
i
]
if
metadatas
else
{}
documents
.
append
(
Document
(
page_content
=
text
,
metadata
=
metadata
))
index_to_id
=
{
i
:
str
(
uuid
.
uuid4
())
for
i
in
range
(
len
(
documents
))}
docstore
=
InMemoryDocstore
(
{
index_to_id
[
i
]:
doc
for
i
,
doc
in
enumerate
(
documents
)}
)
return
cls
(
embedding
.
embed_query
,
index
,
docstore
,
index_to_id
)
img/qr_code_42.jpg
deleted
100644 → 0
浏览文件 @
aa944f6e
273.1 KB
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论