Added cli support, fixed testing

This commit is contained in:
danielcliu 2020-01-20 23:04:46 -08:00
parent dc9fc2ee93
commit f9e553ebaf
4 changed files with 57 additions and 20 deletions

View File

@ -52,15 +52,22 @@ class YouTubeTranscriptApi():
:type video_id: str :type video_id: str
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests :param proxies: a dictionary mapping of http and https proxies to be used for the network requests
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
:param cookies: a string of the path to a text file containing youtube authorization cookies
:type cookies: str - cookies.txt
:return: the list of available transcripts :return: the list of available transcripts
:rtype TranscriptList: :rtype TranscriptList:
""" """
print(cookies)
with requests.Session() as http_client: with requests.Session() as http_client:
if cookies: if cookies:
try:
cj = cookiejar.MozillaCookieJar() cj = cookiejar.MozillaCookieJar()
cj.load(cookies) cj.load(cookies)
http_client.cookies = cj http_client.cookies = cj
except IOError as e:
print("Warning: Path for cookies file was not valid. Did not load any cookies")
except FileNotFoundError as e:
print("Warning: Path for cookies file was not valid. Did not load any cookies")
http_client.proxies = proxies if proxies else {} http_client.proxies = proxies if proxies else {}
return TranscriptListFetcher(http_client).fetch(video_id) return TranscriptListFetcher(http_client).fetch(video_id)
@ -80,6 +87,8 @@ class YouTubeTranscriptApi():
:type continue_after_error: bool :type continue_after_error: bool
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests :param proxies: a dictionary mapping of http and https proxies to be used for the network requests
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
:param cookies: a string of the path to a text file containing youtube authorization cookies
:type cookies: str - cookies.txt
:return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of
video ids, which could not be retrieved video ids, which could not be retrieved
:rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}):
@ -113,6 +122,8 @@ class YouTubeTranscriptApi():
:type languages: list[str] :type languages: list[str]
:param proxies: a dictionary mapping of http and https proxies to be used for the network requests :param proxies: a dictionary mapping of http and https proxies to be used for the network requests
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
:param cookies: a string of the path to a text file containing youtube authorization cookies
:type cookies: str - cookies.txt
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
:rtype [{'text': str, 'start': float, 'end': float}]: :rtype [{'text': str, 'start': float, 'end': float}]:
""" """

View File

@ -21,12 +21,14 @@ class YouTubeTranscriptCli():
if parsed_args.http_proxy != '' or parsed_args.https_proxy != '': if parsed_args.http_proxy != '' or parsed_args.https_proxy != '':
proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy} proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy}
cookies = parsed_args.cookies
transcripts = [] transcripts = []
exceptions = [] exceptions = []
for video_id in parsed_args.video_ids: for video_id in parsed_args.video_ids:
try: try:
transcripts.append(self._fetch_transcript(parsed_args, proxies, video_id)) transcripts.append(self._fetch_transcript(parsed_args, proxies, cookies, video_id))
except Exception as exception: except Exception as exception:
exceptions.append(exception) exceptions.append(exception)
@ -35,8 +37,8 @@ class YouTubeTranscriptCli():
+ ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else []) + ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else [])
) )
def _fetch_transcript(self, parsed_args, proxies, video_id): def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies) transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies, cookies=cookies)
if parsed_args.list_transcripts: if parsed_args.list_transcripts:
return str(transcript_list) return str(transcript_list)
@ -123,5 +125,10 @@ class YouTubeTranscriptCli():
metavar='URL', metavar='URL',
help='Use the specified HTTPS proxy.' help='Use the specified HTTPS proxy.'
) )
parser.add_argument(
'--cookies',
default=None,
help='The cookie file that will be used for authorization with youtube.'
)
return parser.parse_args(self._args) return parser.parse_args(self._args)

View File

@ -159,8 +159,8 @@ class TestYouTubeTranscriptApi(TestCase):
YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages) YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages)
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages, None) YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages, None, None)
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages, None) YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages, None, None)
self.assertEqual(YouTubeTranscriptApi.get_transcript.call_count, 2) self.assertEqual(YouTubeTranscriptApi.get_transcript.call_count, 2)
def test_get_transcripts__stop_on_error(self): def test_get_transcripts__stop_on_error(self):
@ -176,15 +176,21 @@ class TestYouTubeTranscriptApi(TestCase):
YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True) YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True)
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, ('en',), None) YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, ('en',), None, None)
YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, ('en',), None) YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, ('en',), None, None)
def test_get_transcripts__check_cookies(self):
cookies='example_cookies.txt'
YouTubeTranscriptApi.get_transcript = MagicMock()
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies)
YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies)
def test_get_transcript__with_proxies(self): def test_get_transcript__with_proxies(self):
proxies = {'http': '', 'https:': ''} proxies = {'http': '', 'https:': ''}
transcript = YouTubeTranscriptApi.get_transcript( transcript = YouTubeTranscriptApi.get_transcript(
'GJLlxj_dtq8', proxies=proxies 'GJLlxj_dtq8', proxies=proxies
) )
self.assertEqual( self.assertEqual(
transcript, transcript,
[ [
@ -195,4 +201,4 @@ class TestYouTubeTranscriptApi(TestCase):
) )
YouTubeTranscriptApi.get_transcript = MagicMock() YouTubeTranscriptApi.get_transcript = MagicMock()
YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies) YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies)
YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies) YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None)

View File

@ -164,8 +164,8 @@ class TestYouTubeTranscriptCli(TestCase):
def test_run(self): def test_run(self):
YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run() YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run()
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None) YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None)
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None) YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
self.transcript_list_mock.find_transcript.assert_any_call(['de', 'en']) self.transcript_list_mock.find_transcript.assert_any_call(['de', 'en'])
@ -200,8 +200,8 @@ class TestYouTubeTranscriptCli(TestCase):
def test_run__list_transcripts(self): def test_run__list_transcripts(self):
YouTubeTranscriptCli('--list-transcripts v1 v2'.split()).run() YouTubeTranscriptCli('--list-transcripts v1 v2'.split()).run()
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None) YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None)
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None) YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
def test_run__json_output(self): def test_run__json_output(self):
output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run() output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run()
@ -220,10 +220,23 @@ class TestYouTubeTranscriptCli(TestCase):
YouTubeTranscriptApi.list_transcripts.assert_any_call( YouTubeTranscriptApi.list_transcripts.assert_any_call(
'v1', 'v1',
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'} proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'},
cookies= None
) )
YouTubeTranscriptApi.list_transcripts.assert_any_call( YouTubeTranscriptApi.list_transcripts.assert_any_call(
'v2', 'v2',
proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'} proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'},
cookies=None
) )
def test_run__cookies(self):
YouTubeTranscriptCli(
(
'v1 v2 --languages de en '
'--cookies blahblah.txt'
).split()
).run()
YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies='blahblah.txt')
YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies='blahblah.txt')