Merge pull request #204 from jdepoix/feature/invalid-video-id-error
added error which is thrown if url is used as the video id
This commit is contained in:
commit
21a1976516
|
@ -13,4 +13,5 @@ from ._errors import (
|
||||||
CookiesInvalid,
|
CookiesInvalid,
|
||||||
FailedToCreateConsentCookie,
|
FailedToCreateConsentCookie,
|
||||||
YouTubeRequestFailed,
|
YouTubeRequestFailed,
|
||||||
|
InvalidVideoId,
|
||||||
)
|
)
|
||||||
|
|
|
@ -53,6 +53,14 @@ class VideoUnavailable(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = 'The video is no longer available'
|
CAUSE_MESSAGE = 'The video is no longer available'
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidVideoId(CouldNotRetrieveTranscript):
|
||||||
|
CAUSE_MESSAGE = (
|
||||||
|
'You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\n'
|
||||||
|
'Do NOT run: `YouTubeTranscriptApi.get_transcript("https://www.youtube.com/watch?v=1234")`\n'
|
||||||
|
'Instead run: `YouTubeTranscriptApi.get_transcript("1234")`'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TooManyRequests(CouldNotRetrieveTranscript):
|
class TooManyRequests(CouldNotRetrieveTranscript):
|
||||||
CAUSE_MESSAGE = (
|
CAUSE_MESSAGE = (
|
||||||
'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. '
|
'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. '
|
||||||
|
|
|
@ -24,6 +24,7 @@ from ._errors import (
|
||||||
TranslationLanguageNotAvailable,
|
TranslationLanguageNotAvailable,
|
||||||
NoTranscriptAvailable,
|
NoTranscriptAvailable,
|
||||||
FailedToCreateConsentCookie,
|
FailedToCreateConsentCookie,
|
||||||
|
InvalidVideoId,
|
||||||
)
|
)
|
||||||
from ._settings import WATCH_URL
|
from ._settings import WATCH_URL
|
||||||
|
|
||||||
|
@ -41,7 +42,6 @@ class TranscriptListFetcher(object):
|
||||||
self._http_client = http_client
|
self._http_client = http_client
|
||||||
|
|
||||||
def fetch(self, video_id):
|
def fetch(self, video_id):
|
||||||
|
|
||||||
return TranscriptList.build(
|
return TranscriptList.build(
|
||||||
self._http_client,
|
self._http_client,
|
||||||
video_id,
|
video_id,
|
||||||
|
@ -52,6 +52,8 @@ class TranscriptListFetcher(object):
|
||||||
splitted_html = html.split('"captions":')
|
splitted_html = html.split('"captions":')
|
||||||
|
|
||||||
if len(splitted_html) <= 1:
|
if len(splitted_html) <= 1:
|
||||||
|
if video_id.startswith('http://') or video_id.startswith('https://'):
|
||||||
|
raise InvalidVideoId(video_id)
|
||||||
if 'class="g-recaptcha"' in html:
|
if 'class="g-recaptcha"' in html:
|
||||||
raise TooManyRequests(video_id)
|
raise TooManyRequests(video_id)
|
||||||
if '"playabilityStatus":' not in html:
|
if '"playabilityStatus":' not in html:
|
||||||
|
@ -182,7 +184,7 @@ class TranscriptList(object):
|
||||||
|
|
||||||
def find_generated_transcript(self, language_codes):
|
def find_generated_transcript(self, language_codes):
|
||||||
"""
|
"""
|
||||||
Finds a automatically generated transcript for a given language code.
|
Finds an automatically generated transcript for a given language code.
|
||||||
|
|
||||||
:param language_codes: A list of language codes in a descending priority. For example, if this is set to
|
:param language_codes: A list of language codes in a descending priority. For example, if this is set to
|
||||||
['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
|
['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
|
||||||
|
|
|
@ -20,6 +20,7 @@ from youtube_transcript_api import (
|
||||||
CookiesInvalid,
|
CookiesInvalid,
|
||||||
FailedToCreateConsentCookie,
|
FailedToCreateConsentCookie,
|
||||||
YouTubeRequestFailed,
|
YouTubeRequestFailed,
|
||||||
|
InvalidVideoId,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -97,6 +98,16 @@ class TestYouTubeTranscriptApi(TestCase):
|
||||||
|
|
||||||
self.assertTrue(transcript.is_generated)
|
self.assertTrue(transcript.is_generated)
|
||||||
|
|
||||||
|
def test_list_transcripts__url_as_video_id(self):
|
||||||
|
httpretty.register_uri(
|
||||||
|
httpretty.GET,
|
||||||
|
'https://www.youtube.com/watch',
|
||||||
|
body=load_asset('youtube_transcripts_disabled.html.static')
|
||||||
|
)
|
||||||
|
|
||||||
|
with self.assertRaises(InvalidVideoId):
|
||||||
|
YouTubeTranscriptApi.list_transcripts('https://www.youtube.com/watch?v=GJLlxj_dtq8')
|
||||||
|
|
||||||
def test_translate_transcript(self):
|
def test_translate_transcript(self):
|
||||||
transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en'])
|
transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en'])
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue