added class SRTFormatter

This commit is contained in:
Liam Sy 2022-10-02 20:39:15 -04:00
parent 8c38df9939
commit 69c5a46016
1 changed files with 53 additions and 0 deletions

View File

@ -134,6 +134,58 @@ class WebVTTFormatter(Formatter):
""" """
return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts]) return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts])
class SRTFormatter(Formatter):
def _seconds_to_timestamp(self, time):
"""Helper that converts `time` into a transcript cue timestamp for SRT.
:param time: a float representing time in seconds.
:type time: float
:return: a string formatted as a cue timestamp, 'HH:MM:SS,MS'
:rtype str
:example:
>>> self._seconds_to_timestamp(6.93)
'00:00:06,930'
"""
time = float(time)
hours, remainder = divmod(time, 3600)
mins, secs = divmod(remainder, 60)
ms = int(round((time - int(time))*1000, 2))
return "{:02.0f}:{:02.0f}:{:02.0f},{:03d}".format(hours, mins, secs, ms)
def format_transcript(self, transcript, **kwargs):
"""Converts a transcript into SRT formatting.
:param transcript:
:reference: https://www.3playmedia.com/blog/create-srt-file/
"""
lines = []
for i, line in enumerate(transcript):
if i < len(transcript) - 1:
# Looks ahead, use next start time since duration value
# would create an overlap between start times.
time_text = "{} --> {}".format(
self._seconds_to_timestamp(line['start']),
self._seconds_to_timestamp(transcript[i + 1]['start'])
)
else:
# Reached the end, cannot look ahead, use duration now.
duration = line['start'] + line['duration']
time_text = "{} --> {}".format(
self._seconds_to_timestamp(line['start']),
self._seconds_to_timestamp(duration)
)
lines.append("{}\n{}\n{}".format(i + 1, time_text, line['text']))
return "\n\n".join(lines) + "\n"
def format_transcripts(self, transcripts, **kwargs):
"""Converts a list of transcripts into SRT formatting.
:param transcript:
:reference: https://www.3playmedia.com/blog/create-srt-file/
"""
return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts])
class FormatterLoader(object): class FormatterLoader(object):
TYPES = { TYPES = {
@ -141,6 +193,7 @@ class FormatterLoader(object):
'pretty': PrettyPrintFormatter, 'pretty': PrettyPrintFormatter,
'text': TextFormatter, 'text': TextFormatter,
'webvtt': WebVTTFormatter, 'webvtt': WebVTTFormatter,
'srt' : SRTFormatter,
} }
class UnknownFormatterType(Exception): class UnknownFormatterType(Exception):