youtube-transcript-api/youtube_transcript_api/test/test_formatters.py

146 lines
4.9 KiB
Python

from unittest import TestCase
import json
import pprint
from youtube_transcript_api.formatters import (
Formatter,
JSONFormatter,
TextFormatter,
SRTFormatter,
WebVTTFormatter,
PrettyPrintFormatter,
FormatterLoader,
)
class TestFormatters(TestCase):
def setUp(self):
self.transcript = [
{"text": "Test line 1", "start": 0.0, "duration": 1.50},
{"text": "line between", "start": 1.5, "duration": 2.0},
{"text": "testing the end line", "start": 2.5, "duration": 3.25},
]
self.transcripts = [self.transcript, self.transcript]
def test_base_formatter_format_call(self):
with self.assertRaises(NotImplementedError):
Formatter().format_transcript(self.transcript)
with self.assertRaises(NotImplementedError):
Formatter().format_transcripts([self.transcript])
def test_srt_formatter_starting(self):
content = SRTFormatter().format_transcript(self.transcript)
lines = content.split("\n")
# test starting lines
self.assertEqual(lines[0], "1")
self.assertEqual(lines[1], "00:00:00,000 --> 00:00:01,500")
def test_srt_formatter_middle(self):
content = SRTFormatter().format_transcript(self.transcript)
lines = content.split("\n")
# test middle lines
self.assertEqual(lines[4], "2")
self.assertEqual(lines[5], "00:00:01,500 --> 00:00:02,500")
self.assertEqual(lines[6], self.transcript[1]["text"])
def test_srt_formatter_ending(self):
content = SRTFormatter().format_transcript(self.transcript)
lines = content.split("\n")
# test ending lines
self.assertEqual(lines[-2], self.transcript[-1]["text"])
self.assertEqual(lines[-1], "")
def test_srt_formatter_many(self):
formatter = SRTFormatter()
content = formatter.format_transcripts(self.transcripts)
formatted_single_transcript = formatter.format_transcript(self.transcript)
self.assertEqual(
content,
formatted_single_transcript + "\n\n\n" + formatted_single_transcript,
)
def test_webvtt_formatter_starting(self):
content = WebVTTFormatter().format_transcript(self.transcript)
lines = content.split("\n")
# test starting lines
self.assertEqual(lines[0], "WEBVTT")
self.assertEqual(lines[1], "")
def test_webvtt_formatter_ending(self):
content = WebVTTFormatter().format_transcript(self.transcript)
lines = content.split("\n")
# test ending lines
self.assertEqual(lines[-2], self.transcript[-1]["text"])
self.assertEqual(lines[-1], "")
def test_webvtt_formatter_many(self):
formatter = WebVTTFormatter()
content = formatter.format_transcripts(self.transcripts)
formatted_single_transcript = formatter.format_transcript(self.transcript)
self.assertEqual(
content,
formatted_single_transcript + "\n\n\n" + formatted_single_transcript,
)
def test_pretty_print_formatter(self):
content = PrettyPrintFormatter().format_transcript(self.transcript)
self.assertEqual(content, pprint.pformat(self.transcript))
def test_pretty_print_formatter_many(self):
content = PrettyPrintFormatter().format_transcripts(self.transcripts)
self.assertEqual(content, pprint.pformat(self.transcripts))
def test_json_formatter(self):
content = JSONFormatter().format_transcript(self.transcript)
self.assertEqual(json.loads(content), self.transcript)
def test_json_formatter_many(self):
content = JSONFormatter().format_transcripts(self.transcripts)
self.assertEqual(json.loads(content), self.transcripts)
def test_text_formatter(self):
content = TextFormatter().format_transcript(self.transcript)
lines = content.split("\n")
self.assertEqual(lines[0], self.transcript[0]["text"])
self.assertEqual(lines[-1], self.transcript[-1]["text"])
def test_text_formatter_many(self):
formatter = TextFormatter()
content = formatter.format_transcripts(self.transcripts)
formatted_single_transcript = formatter.format_transcript(self.transcript)
self.assertEqual(
content,
formatted_single_transcript + "\n\n\n" + formatted_single_transcript,
)
def test_formatter_loader(self):
loader = FormatterLoader()
formatter = loader.load("json")
self.assertTrue(isinstance(formatter, JSONFormatter))
def test_formatter_loader__default_formatter(self):
loader = FormatterLoader()
formatter = loader.load()
self.assertTrue(isinstance(formatter, PrettyPrintFormatter))
def test_formatter_loader__unknown_format(self):
with self.assertRaises(FormatterLoader.UnknownFormatterType):
FormatterLoader().load("png")