提交 cf07c014 作者: glide-the

Build an abstract dialogue model using classes and methods to represent…

Build an abstract dialogue model using classes and methods to represent different dialogue elements.
This class serves as a fundamental framework for constructing dialogue models.
上级 25e5ab12
from .image_loader import UnstructuredPaddleImageLoader
from .pdf_loader import UnstructuredPaddlePDFLoader
from .dialogue import (
Person,
Dialogue,
Turn,
DialogueLoader
)
__all__ = [
"UnstructuredPaddleImageLoader",
"UnstructuredPaddlePDFLoader",
"DialogueLoader",
]
import json
from abc import ABC
from typing import List
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
class Dialogue:
"""
Build an abstract dialogue model using classes and methods to represent different dialogue elements.
This class serves as a fundamental framework for constructing dialogue models.
"""
def __init__(self, file_path: str):
self.file_path = file_path
self.turns = []
def add_turn(self, turn):
"""
Create an instance of a conversation participant
:param turn:
:return:
"""
self.turns.append(turn)
def parse_dialogue(self):
"""
The parse_dialogue function reads the specified dialogue file and parses each dialogue turn line by line.
For each turn, the function extracts the name of the speaker and the message content from the text,
creating a Turn instance. If the speaker is not already present in the participants dictionary,
a new Person instance is created. Finally, the parsed Turn instance is added to the Dialogue object.
Please note that this sample code assumes that each line in the file follows a specific format:
<speaker>:\r\n<message>\r\n\r\n. If your file has a different format or includes other metadata,
you may need to adjust the parsing logic accordingly.
"""
participants = {}
speaker_name = None
message = None
with open(self.file_path, encoding='utf-8') as file:
lines = file.readlines()
for i, line in enumerate(lines):
line = line.strip()
if not line:
continue
if speaker_name is None:
speaker_name, _ = line.split(':', 1)
elif message is None:
message = line
if speaker_name not in participants:
participants[speaker_name] = Person(speaker_name, None)
speaker = participants[speaker_name]
turn = Turn(speaker, message)
self.add_turn(turn)
# Reset speaker_name and message for the next turn
speaker_name = None
message = None
def display(self):
for turn in self.turns:
print(f"{turn.speaker.name}: {turn.message}")
def export_to_file(self, file_path):
with open(file_path, 'w', encoding='utf-8') as file:
for turn in self.turns:
file.write(f"{turn.speaker.name}: {turn.message}\n")
def to_dict(self):
dialogue_dict = {"turns": []}
for turn in self.turns:
turn_dict = {
"speaker": turn.speaker.name,
"message": turn.message
}
dialogue_dict["turns"].append(turn_dict)
return dialogue_dict
def to_json(self):
dialogue_dict = self.to_dict()
return json.dumps(dialogue_dict, ensure_ascii=False, indent=2)
def participants_to_export(self):
"""
participants_to_export
:return:
"""
participants = set()
for turn in self.turns:
participants.add(turn.speaker.name)
return ', '.join(participants)
class Turn:
def __init__(self, speaker, message):
self.speaker = speaker
self.message = message
class DialogueLoader(BaseLoader, ABC):
"""Load dialogue."""
def __init__(self, file_path: str):
"""Initialize with dialogue."""
self.file_path = file_path
dialogue = Dialogue(file_path=file_path)
dialogue.parse_dialogue()
self.dialogue = dialogue
def load(self) -> List[Document]:
"""Load from dialogue."""
documents = []
participants = self.dialogue.participants_to_export()
for turn in self.dialogue.turns:
metadata = {"source": f"Dialogue File:{self.dialogue.file_path},"
f"speaker:{turn.speaker.name},"
f"participant:{participants}"}
turn_document = Document(page_content=turn.message, metadata=metadata.copy())
documents.append(turn_document)
return documents
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论