updated WebVTT and SRT formatters
This commit is contained in:
parent
3b2e6e253d
commit
68ca703ae0
|
@ -79,8 +79,19 @@ class TextFormatter(Formatter):
|
|||
"""
|
||||
return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts])
|
||||
|
||||
class _TextBasedFormatter(TextFormatter):
|
||||
def _format_timestamp(self, hours, mins, secs, ms):
|
||||
raise NotImplementedError('A subclass of _TextBasedFormatter must implement ' \
|
||||
'their own .format_timestamp() method.')
|
||||
|
||||
class WebVTTFormatter(Formatter):
|
||||
def _format_transcript_header(self, lines):
|
||||
raise NotImplementedError('A subclass of _TextBasedFormatter must implement ' \
|
||||
'their own _format_transcript_header method.')
|
||||
|
||||
def _format_transcript_helper(self, i, time_text, line):
|
||||
raise NotImplementedError('A subclass of _TextBasedFormatter must implement ' \
|
||||
'their own _format_transcript_helper method.')
|
||||
|
||||
def _seconds_to_timestamp(self, time):
|
||||
"""Helper that converts `time` into a transcript cue timestamp.
|
||||
|
||||
|
@ -95,96 +106,55 @@ class WebVTTFormatter(Formatter):
|
|||
'00:00:06.930'
|
||||
"""
|
||||
time = float(time)
|
||||
hours, remainder = divmod(time, 3600)
|
||||
mins, secs = divmod(remainder, 60)
|
||||
hours_float, remainder = divmod(time, 3600)
|
||||
mins_float, secs_float = divmod(remainder, 60)
|
||||
hours, mins, secs = int(hours_float), int(mins_float), int(secs_float)
|
||||
ms = int(round((time - int(time))*1000, 2))
|
||||
return "{:02.0f}:{:02.0f}:{:02.0f}.{:03d}".format(hours, mins, secs, ms)
|
||||
return self._format_timestamp(hours, mins, secs, ms)
|
||||
|
||||
def format_transcript(self, transcript, **kwargs):
|
||||
"""A basic implementation of WEBVTT formatting.
|
||||
"""A basic implementation of WEBVTT/SRT formatting.
|
||||
|
||||
:param transcript:
|
||||
:reference: https://www.w3.org/TR/webvtt1/#introduction-caption
|
||||
:reference:
|
||||
https://www.w3.org/TR/webvtt1/#introduction-caption
|
||||
https://www.3playmedia.com/blog/create-srt-file/
|
||||
"""
|
||||
lines = []
|
||||
for i, line in enumerate(transcript):
|
||||
if i < len(transcript) - 1:
|
||||
# Looks ahead, use next start time since duration value
|
||||
# would create an overlap between start times.
|
||||
time_text = "{} --> {}".format(
|
||||
self._seconds_to_timestamp(line['start']),
|
||||
self._seconds_to_timestamp(transcript[i + 1]['start'])
|
||||
end = line['start'] + line['duration']
|
||||
time_text = "{} --> {}".format(
|
||||
self._seconds_to_timestamp(line['start']),
|
||||
self._seconds_to_timestamp(
|
||||
transcript[i + 1]['start']
|
||||
if i < len(transcript) - 1 and transcript[i + 1]['start'] < end else end
|
||||
)
|
||||
else:
|
||||
# Reached the end, cannot look ahead, use duration now.
|
||||
duration = line['start'] + line['duration']
|
||||
time_text = "{} --> {}".format(
|
||||
self._seconds_to_timestamp(line['start']),
|
||||
self._seconds_to_timestamp(duration)
|
||||
)
|
||||
lines.append("{}\n{}".format(time_text, line['text']))
|
||||
)
|
||||
lines.append(self._format_transcript_helper(i, time_text, line))
|
||||
|
||||
return "WEBVTT\n\n" + "\n\n".join(lines) + "\n"
|
||||
return self._format_transcript_header(lines)
|
||||
|
||||
def format_transcripts(self, transcripts, **kwargs):
|
||||
"""A basic implementation of WEBVTT formatting for a list of transcripts.
|
||||
|
||||
:param transcripts:
|
||||
:reference: https://www.w3.org/TR/webvtt1/#introduction-caption
|
||||
"""
|
||||
return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts])
|
||||
|
||||
class SRTFormatter(Formatter):
|
||||
def _seconds_to_timestamp(self, time):
|
||||
"""Helper that converts `time` into a transcript cue timestamp for SRT.
|
||||
|
||||
:param time: a float representing time in seconds.
|
||||
:type time: float
|
||||
:return: a string formatted as a cue timestamp, 'HH:MM:SS,MS'
|
||||
:rtype str
|
||||
:example:
|
||||
>>> self._seconds_to_timestamp(6.93)
|
||||
'00:00:06,930'
|
||||
"""
|
||||
time = float(time)
|
||||
hours, remainder = divmod(time, 3600)
|
||||
mins, secs = divmod(remainder, 60)
|
||||
ms = int(round((time - int(time))*1000, 2))
|
||||
return "{:02.0f}:{:02.0f}:{:02.0f},{:03d}".format(hours, mins, secs, ms)
|
||||
|
||||
def format_transcript(self, transcript, **kwargs):
|
||||
"""Converts a transcript into SRT formatting.
|
||||
|
||||
:param transcript:
|
||||
:reference: https://www.3playmedia.com/blog/create-srt-file/
|
||||
"""
|
||||
lines = []
|
||||
for i, line in enumerate(transcript):
|
||||
if i < len(transcript) - 1:
|
||||
# Looks ahead, use next start time since duration value
|
||||
# would create an overlap between start times.
|
||||
time_text = "{} --> {}".format(
|
||||
self._seconds_to_timestamp(line['start']),
|
||||
self._seconds_to_timestamp(transcript[i + 1]['start'])
|
||||
)
|
||||
else:
|
||||
# Reached the end, cannot look ahead, use duration now.
|
||||
duration = line['start'] + line['duration']
|
||||
time_text = "{} --> {}".format(
|
||||
self._seconds_to_timestamp(line['start']),
|
||||
self._seconds_to_timestamp(duration)
|
||||
)
|
||||
lines.append("{}\n{}\n{}".format(i + 1, time_text, line['text']))
|
||||
|
||||
class SRTFormatter(_TextBasedFormatter):
|
||||
def _format_timestamp(self, hours, mins, secs, ms):
|
||||
return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, mins, secs, ms)
|
||||
|
||||
def _format_transcript_header(self, lines):
|
||||
return "\n\n".join(lines) + "\n"
|
||||
|
||||
def format_transcripts(self, transcripts, **kwargs):
|
||||
"""Converts a list of transcripts into SRT formatting.
|
||||
def _format_transcript_helper(self, i, time_text, line):
|
||||
return "{}\n{}\n{}".format(i + 1, time_text, line['text'])
|
||||
|
||||
:param transcript:
|
||||
:reference: https://www.3playmedia.com/blog/create-srt-file/
|
||||
"""
|
||||
return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts])
|
||||
|
||||
class WebVTTFormatter(_TextBasedFormatter):
|
||||
def _format_timestamp(self, hours, mins, secs, ms):
|
||||
return "{:02d}:{:02d}:{:02d}.{:03d}".format(hours, mins, secs, ms)
|
||||
|
||||
def _format_transcript_header(self, lines):
|
||||
return "WEBVTT\n\n" + "\n\n".join(lines) + "\n"
|
||||
|
||||
def _format_transcript_helper(self, i, time_text, line):
|
||||
return "{}\n{}".format(time_text, line['text'])
|
||||
|
||||
|
||||
class FormatterLoader(object):
|
||||
|
|
Loading…
Reference in New Issue