Revert "Add formatters module"
Reverting again, apparently I had a misunderstanding of how to revert. Trying again.
This reverts commit dca4021dd7
.
This commit is contained in:
parent
d75ad8c402
commit
1e9b2c7727
4
setup.py
4
setup.py
|
@ -24,10 +24,10 @@ def get_test_suite():
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name="youtube_transcript_api",
|
name="youtube_transcript_api",
|
||||||
version="0.3.0",
|
version="0.3.1",
|
||||||
author="Jonas Depoix",
|
author="Jonas Depoix",
|
||||||
author_email="jonas.depoix@web.de",
|
author_email="jonas.depoix@web.de",
|
||||||
description="This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles and it does not require a headless browser, like other selenium based solutions do!",
|
description="This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles, supports translating subtitles and it does not require a headless browser, like other selenium based solutions do!",
|
||||||
long_description=get_long_description(),
|
long_description=get_long_description(),
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
keywords="youtube-api subtitles youtube transcripts transcript subtitle youtube-subtitles youtube-transcripts cli",
|
keywords="youtube-api subtitles youtube transcripts transcript subtitle youtube-subtitles youtube-transcripts cli",
|
||||||
|
|
|
@ -12,8 +12,6 @@ from ._errors import (
|
||||||
CookiePathInvalid,
|
CookiePathInvalid,
|
||||||
CookiesInvalid
|
CookiesInvalid
|
||||||
)
|
)
|
||||||
from .formatters import formats
|
|
||||||
|
|
||||||
|
|
||||||
class YouTubeTranscriptApi():
|
class YouTubeTranscriptApi():
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -72,8 +70,7 @@ class YouTubeTranscriptApi():
|
||||||
return TranscriptListFetcher(http_client).fetch(video_id)
|
return TranscriptListFetcher(http_client).fetch(video_id)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_transcripts(cls, video_ids, languages=('en',),
|
def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None, cookies=None):
|
||||||
continue_after_error=False, proxies=None, cookies=None, format=None):
|
|
||||||
"""
|
"""
|
||||||
Retrieves the transcripts for a list of videos.
|
Retrieves the transcripts for a list of videos.
|
||||||
|
|
||||||
|
@ -99,8 +96,7 @@ class YouTubeTranscriptApi():
|
||||||
|
|
||||||
for video_id in video_ids:
|
for video_id in video_ids:
|
||||||
try:
|
try:
|
||||||
data[video_id] = cls.get_transcript(video_id, languages,
|
data[video_id] = cls.get_transcript(video_id, languages, proxies, cookies)
|
||||||
proxies, cookies, format=format)
|
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
if not continue_after_error:
|
if not continue_after_error:
|
||||||
raise exception
|
raise exception
|
||||||
|
@ -110,8 +106,7 @@ class YouTubeTranscriptApi():
|
||||||
return data, unretrievable_videos
|
return data, unretrievable_videos
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_transcript(cls, video_id, languages=('en',), proxies=None,
|
def get_transcript(cls, video_id, languages=('en',), proxies=None, cookies=None):
|
||||||
cookies=None, format=None):
|
|
||||||
"""
|
"""
|
||||||
Retrieves the transcript for a single video. This is just a shortcut for calling::
|
Retrieves the transcript for a single video. This is just a shortcut for calling::
|
||||||
|
|
||||||
|
@ -130,10 +125,7 @@ class YouTubeTranscriptApi():
|
||||||
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
|
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
|
||||||
:rtype [{'text': str, 'start': float, 'end': float}]:
|
:rtype [{'text': str, 'start': float, 'end': float}]:
|
||||||
"""
|
"""
|
||||||
Formatter = formats.get_formatter(format)
|
return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch()
|
||||||
transcript = cls.list_transcripts(
|
|
||||||
video_id,proxies, cookies).find_transcript(languages).fetch()
|
|
||||||
return Formatter.format(transcript)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _load_cookies(cls, cookies, video_id):
|
def _load_cookies(cls, cookies, video_id):
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
import pprint
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from ._api import YouTubeTranscriptApi
|
from ._api import YouTubeTranscriptApi
|
||||||
from .formatters import formats
|
|
||||||
|
|
||||||
|
|
||||||
class YouTubeTranscriptCli():
|
class YouTubeTranscriptCli():
|
||||||
|
@ -25,24 +26,19 @@ class YouTubeTranscriptCli():
|
||||||
transcripts = []
|
transcripts = []
|
||||||
exceptions = []
|
exceptions = []
|
||||||
|
|
||||||
Formatter = formats.get_formatter(parsed_args.format)
|
|
||||||
|
|
||||||
for video_id in parsed_args.video_ids:
|
for video_id in parsed_args.video_ids:
|
||||||
try:
|
try:
|
||||||
transcript = self._fetch_transcript(
|
transcripts.append(self._fetch_transcript(parsed_args, proxies, cookies, video_id))
|
||||||
parsed_args, proxies, cookies, video_id)
|
|
||||||
transcripts.append(Formatter.format(transcript))
|
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
exceptions.append(exception)
|
exceptions.append(exception)
|
||||||
|
|
||||||
return ''.join(
|
return '\n\n'.join(
|
||||||
[str(exception) for exception in exceptions]
|
[str(exception) for exception in exceptions]
|
||||||
+ ([Formatter.combine(transcripts)] if transcripts else [])
|
+ ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else [])
|
||||||
)
|
)
|
||||||
|
|
||||||
def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
|
def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
|
||||||
transcript_list = YouTubeTranscriptApi.list_transcripts(
|
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies, cookies=cookies)
|
||||||
video_id, proxies=proxies, cookies=cookies)
|
|
||||||
|
|
||||||
if parsed_args.list_transcripts:
|
if parsed_args.list_transcripts:
|
||||||
return str(transcript_list)
|
return str(transcript_list)
|
||||||
|
@ -102,9 +98,11 @@ class YouTubeTranscriptCli():
|
||||||
help='If this flag is set transcripts which have been manually created will not be retrieved.',
|
help='If this flag is set transcripts which have been manually created will not be retrieved.',
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--format',
|
'--json',
|
||||||
default=None,
|
action='store_const',
|
||||||
help="Use this flag to set which parser format to use, default is 'json'",
|
const=True,
|
||||||
|
default=False,
|
||||||
|
help='If this flag is set the output will be JSON formatted.',
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--translate',
|
'--translate',
|
||||||
|
|
|
@ -1,174 +0,0 @@
|
||||||
from collections import defaultdict
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
def parse_timecode(time):
|
|
||||||
"""Converts a `time` into a formatted transcript timecode.
|
|
||||||
|
|
||||||
:param time: a float representing time in seconds.
|
|
||||||
:type time: float
|
|
||||||
:return: a string formatted as a timecode, 'HH:MM:SS,MS'
|
|
||||||
:rtype str
|
|
||||||
|
|
||||||
:example:
|
|
||||||
>>> parse_timecode(6.93)
|
|
||||||
'00:00:06,930'
|
|
||||||
"""
|
|
||||||
|
|
||||||
time = float(time)
|
|
||||||
times = {
|
|
||||||
'hours': str(int(time) // 3600).rjust(2, '0'),
|
|
||||||
'mins': str(int(time) // 60).rjust(2, '0'),
|
|
||||||
'secs': str(int(time) % 60).rjust(2, '0'),
|
|
||||||
'ms': str(int(round((time - int(time))*1000, 2))).rjust(3, '0')
|
|
||||||
}
|
|
||||||
return "{hours}:{mins}:{secs},{ms}".format(**times)
|
|
||||||
|
|
||||||
|
|
||||||
class TranscriptFormatter(object):
|
|
||||||
"""Abstract Base TranscriptFormatter class
|
|
||||||
|
|
||||||
This class should be inherited from to create additional
|
|
||||||
custom transcript formatters.
|
|
||||||
"""
|
|
||||||
HTML_TAG_REGEX = re.compile(r'<[^>]*>', re.IGNORECASE)
|
|
||||||
DELIMITER = ''
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def combine(cls, transcripts):
|
|
||||||
"""Subclass may override this class method.
|
|
||||||
|
|
||||||
Default behavior of this method will ''.join() the str()
|
|
||||||
of each transcript in transcripts.
|
|
||||||
|
|
||||||
:param transcripts: a list of many transcripts
|
|
||||||
:type transcript_data: list[<formatted transcript>, ...]
|
|
||||||
:return: A string joined on the `cls.DELIMITER` to combine transcripts
|
|
||||||
:rtype: str
|
|
||||||
"""
|
|
||||||
return cls.DELIMITER.join(
|
|
||||||
str(transcript) for transcript in transcripts)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def format(cls, transcript_data):
|
|
||||||
"""Any subclass must implement this format class method.
|
|
||||||
|
|
||||||
:param transcript_data: a list of transcripts, 1 or more.
|
|
||||||
:type transcript_data: list[list[dict], list[dict]]
|
|
||||||
:return: A list where each item is an individual transcript
|
|
||||||
as a string.
|
|
||||||
:rtype: list[str]
|
|
||||||
"""
|
|
||||||
raise NotImplementedError(
|
|
||||||
cls.__name__ + '.format'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class JSONTranscriptFormatter(TranscriptFormatter):
|
|
||||||
"""Formatter for outputting JSON data"""
|
|
||||||
DELIMITER = ','
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def combine(cls, transcripts):
|
|
||||||
return json.dumps(transcripts)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def format(cls, transcript_data):
|
|
||||||
return transcript_data
|
|
||||||
|
|
||||||
|
|
||||||
class TextTranscriptFormatter(TranscriptFormatter):
|
|
||||||
"""Formatter for outputting a Plain Text Format
|
|
||||||
|
|
||||||
Converts the fetched transcript data into separated lines of
|
|
||||||
plain text separated by newline breaks (\n) with no timecodes.
|
|
||||||
"""
|
|
||||||
DELIMITER = '\n\n'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def format(cls, transcript_data):
|
|
||||||
return '{}\n'.format('\n'.join(
|
|
||||||
line['text']for line in transcript_data))
|
|
||||||
|
|
||||||
|
|
||||||
class SRTTranscriptFormatter(TranscriptFormatter):
|
|
||||||
"""Formatter for outputting the SRT Format
|
|
||||||
|
|
||||||
Converts the fetched transcript data into a simple .srt file format.
|
|
||||||
"""
|
|
||||||
DELIMITER = '\n\n'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def format(cls, transcript_data):
|
|
||||||
output = []
|
|
||||||
for frame, item in enumerate(transcript_data, start=1):
|
|
||||||
start_time = float(item.get('start'))
|
|
||||||
duration = float(item.get('duration', '0.0'))
|
|
||||||
|
|
||||||
output.append("{frame}\n".format(frame=frame))
|
|
||||||
output.append("{start_time} --> {end_time}\n".format(
|
|
||||||
start_time=parse_timecode(start_time),
|
|
||||||
end_time=parse_timecode(start_time + duration)
|
|
||||||
))
|
|
||||||
output.append("{text}".format(text=item.get('text')))
|
|
||||||
if frame < len(transcript_data):
|
|
||||||
output.append('\n\n')
|
|
||||||
return '{}\n'.format(''.join(output))
|
|
||||||
|
|
||||||
|
|
||||||
class TranscriptFormatterFactory(object):
|
|
||||||
"""A Transcript Class Factory
|
|
||||||
|
|
||||||
Allows for adding additional custom Transcript classes for the API
|
|
||||||
to use. Custom Transcript classes must inherit from the
|
|
||||||
TranscriptFormatter abstract base class.
|
|
||||||
"""
|
|
||||||
def __init__(self):
|
|
||||||
self._formatters = defaultdict(JSONTranscriptFormatter)
|
|
||||||
|
|
||||||
def add_formatter(self, name, formatter_class):
|
|
||||||
"""Allows for creating additional transcript formatters.
|
|
||||||
|
|
||||||
|
|
||||||
:param name: a name given to the `formatter_class`
|
|
||||||
:type name: str
|
|
||||||
:param formatter_class: a subclass of TranscriptFormatter
|
|
||||||
:type formatter_class: class
|
|
||||||
:rtype None
|
|
||||||
"""
|
|
||||||
if not issubclass(formatter_class, TranscriptFormatter):
|
|
||||||
raise TypeError((
|
|
||||||
'{0} must be a subclass of TranscriptFormatter'
|
|
||||||
).format(formatter_class)
|
|
||||||
)
|
|
||||||
self._formatters.update({name: formatter_class})
|
|
||||||
|
|
||||||
def add_formatters(self, formatters_dict):
|
|
||||||
"""Allow creation of multiple transcript formatters at a time.
|
|
||||||
|
|
||||||
:param formatters_dict: key(s) are the string name to be given
|
|
||||||
to the formatter class, value for each key should be a subclass
|
|
||||||
of TranscriptFormatter.
|
|
||||||
:type formatters_dict: dict
|
|
||||||
:rtype None
|
|
||||||
"""
|
|
||||||
for name, formatter_class in formatters_dict.items():
|
|
||||||
self.add_formatter(name, formatter_class)
|
|
||||||
|
|
||||||
def get_formatter(self, name):
|
|
||||||
"""Retrieve a formatter class by its assigned name.
|
|
||||||
|
|
||||||
:param name: the string name given to the formatter class.
|
|
||||||
:type name: str
|
|
||||||
:return: a subclass of `TranscriptFormatter`
|
|
||||||
"""
|
|
||||||
return self._formatters[name]
|
|
||||||
|
|
||||||
|
|
||||||
formats = TranscriptFormatterFactory()
|
|
||||||
formats.add_formatters({
|
|
||||||
'json': JSONTranscriptFormatter,
|
|
||||||
'srt': SRTTranscriptFormatter,
|
|
||||||
'text': TextTranscriptFormatter
|
|
||||||
})
|
|
|
@ -1,6 +1,6 @@
|
||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
from mock import patch
|
from mock import patch
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
@ -21,10 +21,7 @@ from youtube_transcript_api import (
|
||||||
|
|
||||||
|
|
||||||
def load_asset(filename):
|
def load_asset(filename):
|
||||||
filepath = '{dirname}/assets/{filename}'.format(
|
with open('{dirname}/assets/{filename}'.format(dirname=os.path.dirname(__file__), filename=filename)) as file:
|
||||||
dirname=os.path.dirname(__file__), filename=filename)
|
|
||||||
|
|
||||||
with open(filepath, 'r', encoding='utf-8') as file:
|
|
||||||
return file.read()
|
return file.read()
|
||||||
|
|
||||||
|
|
||||||
|
@ -161,7 +158,7 @@ class TestYouTubeTranscriptApi(TestCase):
|
||||||
def test_get_transcript__with_proxy(self):
|
def test_get_transcript__with_proxy(self):
|
||||||
proxies = {'http': '', 'https:': ''}
|
proxies = {'http': '', 'https:': ''}
|
||||||
transcript = YouTubeTranscriptApi.get_transcript(
|
transcript = YouTubeTranscriptApi.get_transcript(
|
||||||
'GJLlxj_dtq8', proxies=proxies, format=None
|
'GJLlxj_dtq8', proxies=proxies
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
transcript,
|
transcript,
|
||||||
|
@ -194,8 +191,8 @@ class TestYouTubeTranscriptApi(TestCase):
|
||||||
|
|
||||||
YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages)
|
YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages)
|
||||||
|
|
||||||
mock_get_transcript.assert_any_call(video_id_1, languages, None, None, format=None)
|
mock_get_transcript.assert_any_call(video_id_1, languages, None, None)
|
||||||
mock_get_transcript.assert_any_call(video_id_2, languages, None, None, format=None)
|
mock_get_transcript.assert_any_call(video_id_2, languages, None, None)
|
||||||
self.assertEqual(mock_get_transcript.call_count, 2)
|
self.assertEqual(mock_get_transcript.call_count, 2)
|
||||||
|
|
||||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error'))
|
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error'))
|
||||||
|
@ -210,20 +207,20 @@ class TestYouTubeTranscriptApi(TestCase):
|
||||||
|
|
||||||
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True)
|
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True)
|
||||||
|
|
||||||
mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None, format=None)
|
mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None)
|
||||||
mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None, format=None)
|
mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None)
|
||||||
|
|
||||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
|
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
|
||||||
def test_get_transcripts__with_cookies(self, mock_get_transcript):
|
def test_get_transcripts__with_cookies(self, mock_get_transcript):
|
||||||
cookies = '/example_cookies.txt'
|
cookies = '/example_cookies.txt'
|
||||||
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies)
|
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies)
|
||||||
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies, format=None)
|
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies)
|
||||||
|
|
||||||
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
|
@patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
|
||||||
def test_get_transcripts__with_proxies(self, mock_get_transcript):
|
def test_get_transcripts__with_proxies(self, mock_get_transcript):
|
||||||
proxies = {'http': '', 'https:': ''}
|
proxies = {'http': '', 'https:': ''}
|
||||||
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies)
|
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies)
|
||||||
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None, format=None)
|
mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None)
|
||||||
|
|
||||||
def test_load_cookies(self):
|
def test_load_cookies(self):
|
||||||
dirname, filename = os.path.split(os.path.abspath(__file__))
|
dirname, filename = os.path.split(os.path.abspath(__file__))
|
||||||
|
|
|
@ -25,50 +25,50 @@ class TestYouTubeTranscriptCli(TestCase):
|
||||||
YouTubeTranscriptApi.list_transcripts = MagicMock(return_value=self.transcript_list_mock)
|
YouTubeTranscriptApi.list_transcripts = MagicMock(return_value=self.transcript_list_mock)
|
||||||
|
|
||||||
def test_argument_parsing(self):
|
def test_argument_parsing(self):
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --format json --languages de en'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli('v1 v2 --json --languages de en'.split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.json, True)
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||||
self.assertEqual(parsed_args.http_proxy, '')
|
self.assertEqual(parsed_args.http_proxy, '')
|
||||||
self.assertEqual(parsed_args.https_proxy, '')
|
self.assertEqual(parsed_args.https_proxy, '')
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --format json'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.json, True)
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||||
self.assertEqual(parsed_args.http_proxy, '')
|
self.assertEqual(parsed_args.http_proxy, '')
|
||||||
self.assertEqual(parsed_args.https_proxy, '')
|
self.assertEqual(parsed_args.https_proxy, '')
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli(' --format json v1 v2 --languages de en'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli(' --json v1 v2 --languages de en'.split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.json, True)
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||||
self.assertEqual(parsed_args.http_proxy, '')
|
self.assertEqual(parsed_args.http_proxy, '')
|
||||||
self.assertEqual(parsed_args.https_proxy, '')
|
self.assertEqual(parsed_args.https_proxy, '')
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli(
|
parsed_args = YouTubeTranscriptCli(
|
||||||
'v1 v2 --languages de en --format json --http-proxy http://user:pass@domain:port --https-proxy https://user:pass@domain:port'.split()
|
'v1 v2 --languages de en --json --http-proxy http://user:pass@domain:port --https-proxy https://user:pass@domain:port'.split()
|
||||||
)._parse_args()
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.json, True)
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||||
self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
|
self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
|
||||||
self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
|
self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli(
|
parsed_args = YouTubeTranscriptCli(
|
||||||
'v1 v2 --languages de en --format json --http-proxy http://user:pass@domain:port'.split()
|
'v1 v2 --languages de en --json --http-proxy http://user:pass@domain:port'.split()
|
||||||
)._parse_args()
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.json, True)
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||||
self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
|
self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
|
||||||
self.assertEqual(parsed_args.https_proxy, '')
|
self.assertEqual(parsed_args.https_proxy, '')
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli(
|
parsed_args = YouTubeTranscriptCli(
|
||||||
'v1 v2 --languages de en --format json --https-proxy https://user:pass@domain:port'.split()
|
'v1 v2 --languages de en --json --https-proxy https://user:pass@domain:port'.split()
|
||||||
)._parse_args()
|
)._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.json, True)
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||||
self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
|
self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
|
||||||
self.assertEqual(parsed_args.http_proxy, '')
|
self.assertEqual(parsed_args.http_proxy, '')
|
||||||
|
@ -76,28 +76,28 @@ class TestYouTubeTranscriptCli(TestCase):
|
||||||
def test_argument_parsing__only_video_ids(self):
|
def test_argument_parsing__only_video_ids(self):
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli('v1 v2'.split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||||
self.assertEqual(parsed_args.format, None)
|
self.assertEqual(parsed_args.json, False)
|
||||||
self.assertEqual(parsed_args.languages, ['en'])
|
self.assertEqual(parsed_args.languages, ['en'])
|
||||||
|
|
||||||
def test_argument_parsing__fail_without_video_ids(self):
|
def test_argument_parsing__fail_without_video_ids(self):
|
||||||
with self.assertRaises(SystemExit):
|
with self.assertRaises(SystemExit):
|
||||||
YouTubeTranscriptCli('--format json'.split())._parse_args()
|
YouTubeTranscriptCli('--json'.split())._parse_args()
|
||||||
|
|
||||||
def test_argument_parsing__json(self):
|
def test_argument_parsing__json(self):
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --format json'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli('v1 v2 --json'.split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.json, True)
|
||||||
self.assertEqual(parsed_args.languages, ['en'])
|
self.assertEqual(parsed_args.languages, ['en'])
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli('--format json v1 v2'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli('--json v1 v2'.split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||||
self.assertEqual(parsed_args.format, 'json')
|
self.assertEqual(parsed_args.json, True)
|
||||||
self.assertEqual(parsed_args.languages, ['en'])
|
self.assertEqual(parsed_args.languages, ['en'])
|
||||||
|
|
||||||
def test_argument_parsing__languages(self):
|
def test_argument_parsing__languages(self):
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en'.split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||||
self.assertEqual(parsed_args.format, None)
|
self.assertEqual(parsed_args.json, False)
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||||
|
|
||||||
def test_argument_parsing__proxies(self):
|
def test_argument_parsing__proxies(self):
|
||||||
|
@ -135,13 +135,13 @@ class TestYouTubeTranscriptCli(TestCase):
|
||||||
def test_argument_parsing__translate(self):
|
def test_argument_parsing__translate(self):
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --translate cz'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --translate cz'.split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||||
self.assertEqual(parsed_args.format, None)
|
self.assertEqual(parsed_args.json, False)
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||||
self.assertEqual(parsed_args.translate, 'cz')
|
self.assertEqual(parsed_args.translate, 'cz')
|
||||||
|
|
||||||
parsed_args = YouTubeTranscriptCli('v1 v2 --translate cz --languages de en'.split())._parse_args()
|
parsed_args = YouTubeTranscriptCli('v1 v2 --translate cz --languages de en'.split())._parse_args()
|
||||||
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
|
||||||
self.assertEqual(parsed_args.format, None)
|
self.assertEqual(parsed_args.json, False)
|
||||||
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
self.assertEqual(parsed_args.languages, ['de', 'en'])
|
||||||
self.assertEqual(parsed_args.translate, 'cz')
|
self.assertEqual(parsed_args.translate, 'cz')
|
||||||
|
|
||||||
|
@ -204,7 +204,8 @@ class TestYouTubeTranscriptCli(TestCase):
|
||||||
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
|
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
|
||||||
|
|
||||||
def test_run__json_output(self):
|
def test_run__json_output(self):
|
||||||
output = YouTubeTranscriptCli('v1 v2 --languages de en --format json'.split()).run()
|
output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run()
|
||||||
|
|
||||||
# will fail if output is not valid json
|
# will fail if output is not valid json
|
||||||
json.loads(output)
|
json.loads(output)
|
||||||
|
|
||||||
|
|
|
@ -1,99 +0,0 @@
|
||||||
from unittest import TestCase
|
|
||||||
from mock import MagicMock
|
|
||||||
import json
|
|
||||||
|
|
||||||
from youtube_transcript_api.formatters import (
|
|
||||||
JSONTranscriptFormatter,
|
|
||||||
parse_timecode,
|
|
||||||
SRTTranscriptFormatter,
|
|
||||||
TextTranscriptFormatter,
|
|
||||||
TranscriptFormatter,
|
|
||||||
TranscriptFormatterFactory
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestTranscriptFormatters(TestCase):
|
|
||||||
@classmethod
|
|
||||||
def setUpClass(cls):
|
|
||||||
cls.transcript = [
|
|
||||||
{
|
|
||||||
'text': 'Hey, this is just a test',
|
|
||||||
'start': 0.0,
|
|
||||||
'duration': 1.54
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'text': 'this is not the original transcript',
|
|
||||||
'start': 1.54,
|
|
||||||
'duration': 4.16
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'text': 'just something shorter, I made up for testing',
|
|
||||||
'start': 5.7,
|
|
||||||
'duration': 3.239
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
def test_base_formatter_combine(self):
|
|
||||||
expecting = ''.join([str(line) for line in self.transcript])
|
|
||||||
|
|
||||||
self.assertEqual(
|
|
||||||
TranscriptFormatter.combine(self.transcript),
|
|
||||||
expecting
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_base_format_not_implemented(self):
|
|
||||||
with self.assertRaises(NotImplementedError):
|
|
||||||
TranscriptFormatter.format(self.transcript)
|
|
||||||
|
|
||||||
def test_text_formatter_format(self):
|
|
||||||
text = '\n'.join([line.get('text') for line in self.transcript])
|
|
||||||
text_fmt = TextTranscriptFormatter.format(self.transcript)
|
|
||||||
self.assertIn(text + '\n', text_fmt)
|
|
||||||
|
|
||||||
def test_srt_formatter_format(self):
|
|
||||||
start = self.transcript[0].get('start')
|
|
||||||
duration = self.transcript[0].get('duration')
|
|
||||||
srt_fmt = SRTTranscriptFormatter.format(self.transcript)
|
|
||||||
self.assertIn('{start} --> {end}'.format(
|
|
||||||
start=parse_timecode(start),
|
|
||||||
end=parse_timecode(start+duration)
|
|
||||||
), srt_fmt)
|
|
||||||
|
|
||||||
def test_json_formatter_format(self):
|
|
||||||
json_fmt = JSONTranscriptFormatter.format(self.transcript)
|
|
||||||
self.assertIsInstance(json.dumps(json_fmt), str)
|
|
||||||
|
|
||||||
def test_invalid_parse_timecode(self):
|
|
||||||
start_time = 'not_float'
|
|
||||||
|
|
||||||
with self.assertRaises(ValueError):
|
|
||||||
parse_timecode(start_time)
|
|
||||||
|
|
||||||
def test_valid_parse_timecode(self):
|
|
||||||
start_time = 0.0
|
|
||||||
end_time = 5.20
|
|
||||||
|
|
||||||
self.assertEqual(
|
|
||||||
parse_timecode(start_time),
|
|
||||||
'00:00:00,000'
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(
|
|
||||||
parse_timecode(end_time),
|
|
||||||
'00:00:05,200'
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_formatter_factory_valid_single_add(self):
|
|
||||||
factory = TranscriptFormatterFactory()
|
|
||||||
factory.add_formatter('json', JSONTranscriptFormatter)
|
|
||||||
|
|
||||||
self.assertDictEqual(
|
|
||||||
getattr(factory, '_formatters'),
|
|
||||||
{'json': JSONTranscriptFormatter}
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_formatter_factory_invalid_single_add(self):
|
|
||||||
factory = TranscriptFormatterFactory()
|
|
||||||
|
|
||||||
with self.assertRaises(TypeError):
|
|
||||||
factory.add_formatter('magic', MagicMock)
|
|
Loading…
Reference in New Issue