diff --git a/youtube_transcript_api/formatters.py b/youtube_transcript_api/formatters.py index 986044f..557ec10 100644 --- a/youtube_transcript_api/formatters.py +++ b/youtube_transcript_api/formatters.py @@ -134,6 +134,58 @@ class WebVTTFormatter(Formatter): """ return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts]) +class SRTFormatter(Formatter): + def _seconds_to_timestamp(self, time): + """Helper that converts `time` into a transcript cue timestamp for SRT. + + :param time: a float representing time in seconds. + :type time: float + :return: a string formatted as a cue timestamp, 'HH:MM:SS,MS' + :rtype str + :example: + >>> self._seconds_to_timestamp(6.93) + '00:00:06,930' + """ + time = float(time) + hours, remainder = divmod(time, 3600) + mins, secs = divmod(remainder, 60) + ms = int(round((time - int(time))*1000, 2)) + return "{:02.0f}:{:02.0f}:{:02.0f},{:03d}".format(hours, mins, secs, ms) + + def format_transcript(self, transcript, **kwargs): + """Converts a transcript into SRT formatting. + + :param transcript: + :reference: https://www.3playmedia.com/blog/create-srt-file/ + """ + lines = [] + for i, line in enumerate(transcript): + if i < len(transcript) - 1: + # Looks ahead, use next start time since duration value + # would create an overlap between start times. + time_text = "{} --> {}".format( + self._seconds_to_timestamp(line['start']), + self._seconds_to_timestamp(transcript[i + 1]['start']) + ) + else: + # Reached the end, cannot look ahead, use duration now. + duration = line['start'] + line['duration'] + time_text = "{} --> {}".format( + self._seconds_to_timestamp(line['start']), + self._seconds_to_timestamp(duration) + ) + lines.append("{}\n{}\n{}".format(i + 1, time_text, line['text'])) + + return "\n\n".join(lines) + "\n" + + def format_transcripts(self, transcripts, **kwargs): + """Converts a list of transcripts into SRT formatting. + + :param transcript: + :reference: https://www.3playmedia.com/blog/create-srt-file/ + """ + return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts]) + class FormatterLoader(object): TYPES = { @@ -141,6 +193,7 @@ class FormatterLoader(object): 'pretty': PrettyPrintFormatter, 'text': TextFormatter, 'webvtt': WebVTTFormatter, + 'srt' : SRTFormatter, } class UnknownFormatterType(Exception):