diff --git a/youtube_transcript_api/__init__.py b/youtube_transcript_api/__init__.py index 1fe0f73..baefd02 100644 --- a/youtube_transcript_api/__init__.py +++ b/youtube_transcript_api/__init__.py @@ -5,6 +5,7 @@ from ._errors import ( NoTranscriptFound, CouldNotRetrieveTranscript, VideoUnavailable, + TooManyRequests, NotTranslatable, TranslationLanguageNotAvailable, NoTranscriptAvailable, diff --git a/youtube_transcript_api/_errors.py b/youtube_transcript_api/_errors.py index 2f83a16..f7a5658 100644 --- a/youtube_transcript_api/_errors.py +++ b/youtube_transcript_api/_errors.py @@ -37,7 +37,10 @@ class CouldNotRetrieveTranscript(Exception): class VideoUnavailable(CouldNotRetrieveTranscript): CAUSE_MESSAGE = 'The video is no longer available' - + +class TooManyRequests(CouldNotRetrieveTranscript): + CAUSE_MESSAGE = ('YouTube is receiving too many requests from this IP,' + ' and now requires that a captcha must be solved in order to continue.') class TranscriptsDisabled(CouldNotRetrieveTranscript): CAUSE_MESSAGE = 'Subtitles are disabled for this video' diff --git a/youtube_transcript_api/_transcripts.py b/youtube_transcript_api/_transcripts.py index 6b767ff..9400a1d 100644 --- a/youtube_transcript_api/_transcripts.py +++ b/youtube_transcript_api/_transcripts.py @@ -14,6 +14,7 @@ import re from ._html_unescaping import unescape from ._errors import ( VideoUnavailable, + TooManyRequests, NoTranscriptFound, TranscriptsDisabled, NotTranslatable, @@ -38,6 +39,8 @@ class TranscriptListFetcher(): splitted_html = html.split('"captions":') if len(splitted_html) <= 1: + if 'class="g-recaptcha"' in html: + raise TooManyRequests(video_id) if '"playabilityStatus":' not in html: raise VideoUnavailable(video_id) diff --git a/youtube_transcript_api/test/assets/youtube_too_many_requests.html.static b/youtube_transcript_api/test/assets/youtube_too_many_requests.html.static new file mode 100644 index 0000000..c63003f --- /dev/null +++ b/youtube_transcript_api/test/assets/youtube_too_many_requests.html.static @@ -0,0 +1,239 @@ + + + + YouTube + + + + + + + + + +
+
+

+ Perdón por la interrupción. Hemos recibido un gran número de + solicitudes de tu red. +

+

+ Para seguir disfrutando de YouTube, rellena el siguiente formulario. +

+
+
+
+
+ +
+ ES + +
+
+ +
+ + diff --git a/youtube_transcript_api/test/test_api.py b/youtube_transcript_api/test/test_api.py index 5f95451..daf98f8 100644 --- a/youtube_transcript_api/test/test_api.py +++ b/youtube_transcript_api/test/test_api.py @@ -12,6 +12,7 @@ from youtube_transcript_api import ( TranscriptsDisabled, NoTranscriptFound, VideoUnavailable, + TooManyRequests, NoTranscriptAvailable, NotTranslatable, TranslationLanguageNotAvailable, @@ -134,6 +135,16 @@ class TestYouTubeTranscriptApi(TestCase): with self.assertRaises(VideoUnavailable): YouTubeTranscriptApi.get_transcript('abc') + def test_get_transcript__exception_if_video_unavailable(self): + httpretty.register_uri( + httpretty.GET, + 'https://www.youtube.com/watch', + body=load_asset('youtube_too_many_requests.html.static') + ) + + with self.assertRaises(TooManyRequests): + YouTubeTranscriptApi.get_transcript('abc') + def test_get_transcript__exception_if_transcripts_disabled(self): httpretty.register_uri( httpretty.GET,