diff --git a/youtube_transcript_api/__init__.py b/youtube_transcript_api/__init__.py index 2d58d34..4cf4b9c 100644 --- a/youtube_transcript_api/__init__.py +++ b/youtube_transcript_api/__init__.py @@ -12,4 +12,5 @@ from ._errors import ( CookiePathInvalid, CookiesInvalid, FailedToCreateConsentCookie, + YouTubeRequestFailed, ) diff --git a/youtube_transcript_api/_errors.py b/youtube_transcript_api/_errors.py index cd645b5..cae17ad 100644 --- a/youtube_transcript_api/_errors.py +++ b/youtube_transcript_api/_errors.py @@ -35,6 +35,20 @@ class CouldNotRetrieveTranscript(Exception): return self.CAUSE_MESSAGE +class YouTubeRequestFailed(CouldNotRetrieveTranscript): + CAUSE_MESSAGE = 'Request to YouTube failed: {reason}' + + def __init__(self, video_id, http_error): + self.reason = str(http_error) + super(YouTubeRequestFailed, self).__init__(video_id) + + @property + def cause(self): + return self.CAUSE_MESSAGE.format( + reason=self.reason, + ) + + class VideoUnavailable(CouldNotRetrieveTranscript): CAUSE_MESSAGE = 'The video is no longer available' diff --git a/youtube_transcript_api/_transcripts.py b/youtube_transcript_api/_transcripts.py index b0d6f38..8240420 100644 --- a/youtube_transcript_api/_transcripts.py +++ b/youtube_transcript_api/_transcripts.py @@ -11,10 +11,13 @@ from xml.etree import ElementTree import re +from requests import HTTPError + from ._html_unescaping import unescape from ._errors import ( VideoUnavailable, TooManyRequests, + YouTubeRequestFailed, NoTranscriptFound, TranscriptsDisabled, NotTranslatable, @@ -25,6 +28,14 @@ from ._errors import ( from ._settings import WATCH_URL +def _raise_http_errors(response, video_id): + try: + response.raise_for_status() + return response + except HTTPError as error: + raise YouTubeRequestFailed(error, video_id) + + class TranscriptListFetcher(object): def __init__(self, http_client): self._http_client = http_client @@ -72,7 +83,8 @@ class TranscriptListFetcher(object): return html def _fetch_html(self, video_id): - return self._http_client.get(WATCH_URL.format(video_id=video_id)).text.replace( + response = self._http_client.get(WATCH_URL.format(video_id=video_id)) + return _raise_http_errors(response, video_id).text.replace( '\\u0026', '&' ).replace( '\\', '' @@ -273,8 +285,9 @@ class Transcript(object): :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys :rtype [{'text': str, 'start': float, 'end': float}]: """ + response = self._http_client.get(self._url) return _TranscriptParser().parse( - self._http_client.get(self._url).text + _raise_http_errors(response, self.video_id).text, ) def __str__(self): diff --git a/youtube_transcript_api/test/test_api.py b/youtube_transcript_api/test/test_api.py index 240164d..0cc7fc3 100644 --- a/youtube_transcript_api/test/test_api.py +++ b/youtube_transcript_api/test/test_api.py @@ -19,6 +19,7 @@ from youtube_transcript_api import ( CookiePathInvalid, CookiesInvalid, FailedToCreateConsentCookie, + YouTubeRequestFailed, ) @@ -174,6 +175,16 @@ class TestYouTubeTranscriptApi(TestCase): with self.assertRaises(VideoUnavailable): YouTubeTranscriptApi.get_transcript('abc') + def test_get_transcript__exception_if_youtube_request_fails(self): + httpretty.register_uri( + httpretty.GET, + 'https://www.youtube.com/watch', + status=500 + ) + + with self.assertRaises(YouTubeRequestFailed): + YouTubeTranscriptApi.get_transcript('abc') + def test_get_transcript__exception_if_youtube_request_limit_reached(self): httpretty.register_uri( httpretty.GET,