diff --git a/youtube_transcript_api/_api.py b/youtube_transcript_api/_api.py index dd91cfd..ca2a000 100644 --- a/youtube_transcript_api/_api.py +++ b/youtube_transcript_api/_api.py @@ -52,15 +52,22 @@ class YouTubeTranscriptApi(): :type video_id: str :param proxies: a dictionary mapping of http and https proxies to be used for the network requests :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies + :param cookies: a string of the path to a text file containing youtube authorization cookies + :type cookies: str - cookies.txt :return: the list of available transcripts :rtype TranscriptList: """ - print(cookies) with requests.Session() as http_client: if cookies: - cj = cookiejar.MozillaCookieJar() - cj.load(cookies) - http_client.cookies = cj + try: + cj = cookiejar.MozillaCookieJar() + cj.load(cookies) + http_client.cookies = cj + except IOError as e: + print("Warning: Path for cookies file was not valid. Did not load any cookies") + except FileNotFoundError as e: + print("Warning: Path for cookies file was not valid. Did not load any cookies") + http_client.proxies = proxies if proxies else {} return TranscriptListFetcher(http_client).fetch(video_id) @@ -80,6 +87,8 @@ class YouTubeTranscriptApi(): :type continue_after_error: bool :param proxies: a dictionary mapping of http and https proxies to be used for the network requests :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies + :param cookies: a string of the path to a text file containing youtube authorization cookies + :type cookies: str - cookies.txt :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of video ids, which could not be retrieved :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}): @@ -113,6 +122,8 @@ class YouTubeTranscriptApi(): :type languages: list[str] :param proxies: a dictionary mapping of http and https proxies to be used for the network requests :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies + :param cookies: a string of the path to a text file containing youtube authorization cookies + :type cookies: str - cookies.txt :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys :rtype [{'text': str, 'start': float, 'end': float}]: """ diff --git a/youtube_transcript_api/_cli.py b/youtube_transcript_api/_cli.py index 043bf19..405d6e1 100644 --- a/youtube_transcript_api/_cli.py +++ b/youtube_transcript_api/_cli.py @@ -21,12 +21,14 @@ class YouTubeTranscriptCli(): if parsed_args.http_proxy != '' or parsed_args.https_proxy != '': proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy} + cookies = parsed_args.cookies + transcripts = [] exceptions = [] for video_id in parsed_args.video_ids: try: - transcripts.append(self._fetch_transcript(parsed_args, proxies, video_id)) + transcripts.append(self._fetch_transcript(parsed_args, proxies, cookies, video_id)) except Exception as exception: exceptions.append(exception) @@ -35,8 +37,8 @@ class YouTubeTranscriptCli(): + ([json.dumps(transcripts) if parsed_args.json else pprint.pformat(transcripts)] if transcripts else []) ) - def _fetch_transcript(self, parsed_args, proxies, video_id): - transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies) + def _fetch_transcript(self, parsed_args, proxies, cookies, video_id): + transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies, cookies=cookies) if parsed_args.list_transcripts: return str(transcript_list) @@ -123,5 +125,10 @@ class YouTubeTranscriptCli(): metavar='URL', help='Use the specified HTTPS proxy.' ) - + parser.add_argument( + '--cookies', + default=None, + help='The cookie file that will be used for authorization with youtube.' + ) + return parser.parse_args(self._args) diff --git a/youtube_transcript_api/test/test_api.py b/youtube_transcript_api/test/test_api.py index f506d33..e13e7ac 100644 --- a/youtube_transcript_api/test/test_api.py +++ b/youtube_transcript_api/test/test_api.py @@ -159,8 +159,8 @@ class TestYouTubeTranscriptApi(TestCase): YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages) - YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages, None) - YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages, None) + YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, languages, None, None) + YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, languages, None, None) self.assertEqual(YouTubeTranscriptApi.get_transcript.call_count, 2) def test_get_transcripts__stop_on_error(self): @@ -176,15 +176,21 @@ class TestYouTubeTranscriptApi(TestCase): YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True) - YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, ('en',), None) - YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, ('en',), None) + YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_1, ('en',), None, None) + YouTubeTranscriptApi.get_transcript.assert_any_call(video_id_2, ('en',), None, None) + + def test_get_transcripts__check_cookies(self): + cookies='example_cookies.txt' + YouTubeTranscriptApi.get_transcript = MagicMock() + YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies) + YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies) + def test_get_transcript__with_proxies(self): proxies = {'http': '', 'https:': ''} transcript = YouTubeTranscriptApi.get_transcript( 'GJLlxj_dtq8', proxies=proxies ) - self.assertEqual( transcript, [ @@ -195,4 +201,4 @@ class TestYouTubeTranscriptApi(TestCase): ) YouTubeTranscriptApi.get_transcript = MagicMock() YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies) - YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies) + YouTubeTranscriptApi.get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None) diff --git a/youtube_transcript_api/test/test_cli.py b/youtube_transcript_api/test/test_cli.py index d2676d8..158cd35 100644 --- a/youtube_transcript_api/test/test_cli.py +++ b/youtube_transcript_api/test/test_cli.py @@ -164,8 +164,8 @@ class TestYouTubeTranscriptCli(TestCase): def test_run(self): YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run() - YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None) - YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None) + YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None) + YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None) self.transcript_list_mock.find_transcript.assert_any_call(['de', 'en']) @@ -200,8 +200,8 @@ class TestYouTubeTranscriptCli(TestCase): def test_run__list_transcripts(self): YouTubeTranscriptCli('--list-transcripts v1 v2'.split()).run() - YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None) - YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None) + YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None) + YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None) def test_run__json_output(self): output = YouTubeTranscriptCli('v1 v2 --languages de en --json'.split()).run() @@ -220,10 +220,23 @@ class TestYouTubeTranscriptCli(TestCase): YouTubeTranscriptApi.list_transcripts.assert_any_call( 'v1', - proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'} + proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'}, + cookies= None ) YouTubeTranscriptApi.list_transcripts.assert_any_call( 'v2', - proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'} + proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'}, + cookies=None ) + + def test_run__cookies(self): + YouTubeTranscriptCli( + ( + 'v1 v2 --languages de en ' + '--cookies blahblah.txt' + ).split() + ).run() + YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies='blahblah.txt') + YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies='blahblah.txt') +