Added cli support, fixed testing

This commit is contained in:
danielcliu 2020-01-20 23:04:46 -08:00
parent dc9fc2ee93
commit f9e553ebaf
4 changed files with 57 additions and 20 deletions

View File

@ -52,15 +52,22 @@ class YouTubeTranscriptApi():
:type video_id: str
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
:param cookies: a string of the path to a text file containing youtube authorization cookies
:type cookies: str - cookies.txt
:return: the list of available transcripts
:rtype TranscriptList:
"""
print(cookies)
with requests.Session() as http_client:
if cookies:
try:
cj = cookiejar.MozillaCookieJar()
cj.load(cookies)
http_client.cookies = cj
except IOError as e:
print("Warning: Path for cookies file was not valid. Did not load any cookies")
except FileNotFoundError as e:
print("Warning: Path for cookies file was not valid. Did not load any cookies")
http_client.proxies = proxies if proxies else {}
return TranscriptListFetcher(http_client).fetch(video_id)
@ -80,6 +87,8 @@ class YouTubeTranscriptApi():
:type continue_after_error: bool
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
:param cookies: a string of the path to a text file containing youtube authorization cookies
:type cookies: str - cookies.txt
:return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of
video ids, which could not be retrieved
:rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}):
@ -113,6 +122,8 @@ class YouTubeTranscriptApi():
:type languages: list[str]
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
:param cookies: a string of the path to a text file containing youtube authorization cookies
:type cookies: str - cookies.txt
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
:rtype [{'text': str, 'start': float, 'end': float}]:
"""

View File

@ -21,12 +21,14 @@ class YouTubeTranscriptCli():
if parsed_args.http_proxy != '' or parsed_args.https_proxy != '':
proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy}
cookies = parsed_args.cookies
transcripts = []
exceptions = []
for video_id in parsed_args.video_ids:
try:
transcripts.append(self._fetch_transcript(parsed_args, proxies, video_id))
transcripts.append(self._fetch_transcript(parsed_args, proxies, cookies, video_id))
except Exception as exception:
exceptions.append(exception)
@ -35,8 +37,8 @@ class YouTubeTranscriptCli():
+ ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else [])
)
def _fetch_transcript(self, parsed_args, proxies, video_id):
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies)
def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies, cookies=cookies)
if parsed_args.list_transcripts:
return str(transcript_list)
@ -123,5 +125,10 @@ class YouTubeTranscriptCli():
metavar='URL',
help='Use the specified HTTPS proxy.'
)
parser.add_argument(
'--cookies',
default=None,
help='The cookie file that will be used for authorization with youtube.'
)
return parser.parse_args(self._args)

View File

@ -159,8 +159,8 @@ class TestYouTubeTranscriptApi(TestCase):
YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages)
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages, None)
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages, None)
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages, None, None)
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages, None, None)
self.assertEqual(YouTubeTranscriptApi.get_transcript.call_count, 2)
def test_get_transcripts__stop_on_error(self):
@ -176,15 +176,21 @@ class TestYouTubeTranscriptApi(TestCase):
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True)
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, ('en',), None)
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, ('en',), None)
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, ('en',), None, None)
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, ('en',), None, None)
def test_get_transcripts__check_cookies(self):
cookies='example_cookies.txt'
YouTubeTranscriptApi.get_transcript = MagicMock()
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies)
YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies)
def test_get_transcript__with_proxies(self):
proxies = {'http': '', 'https:': ''}
transcript = YouTubeTranscriptApi.get_transcript(
'GJLlxj_dtq8', proxies=proxies
)
self.assertEqual(
transcript,
[
@ -195,4 +201,4 @@ class TestYouTubeTranscriptApi(TestCase):
)
YouTubeTranscriptApi.get_transcript = MagicMock()
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies)
YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies)
YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None)

View File

@ -164,8 +164,8 @@ class TestYouTubeTranscriptCli(TestCase):
def test_run(self):
YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run()
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None)
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None)
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None)
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
self.transcript_list_mock.find_transcript.assert_any_call(['de', 'en'])
@ -200,8 +200,8 @@ class TestYouTubeTranscriptCli(TestCase):
def test_run__list_transcripts(self):
YouTubeTranscriptCli('--list-transcripts v1 v2'.split()).run()
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None)
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None)
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None)
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
def test_run__json_output(self):
output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run()
@ -220,10 +220,23 @@ class TestYouTubeTranscriptCli(TestCase):
YouTubeTranscriptApi.list_transcripts.assert_any_call(
'v1',
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'}
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'},
cookies= None
)
YouTubeTranscriptApi.list_transcripts.assert_any_call(
'v2',
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'}
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'},
cookies=None
)
def test_run__cookies(self):
YouTubeTranscriptCli(
(
'v1 v2 --languages de en '
'--cookies blahblah.txt'
).split()
).run()
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies='blahblah.txt')
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies='blahblah.txt')