error handling improved
This commit is contained in:
parent
55d76a158a
commit
91fe71c86c
|
@ -6,28 +6,75 @@ import logging
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class YouTubeTranscriptApi():
|
class YouTubeTranscriptApi():
|
||||||
|
class CouldNotRetrieveTranscript(Exception):
|
||||||
|
"""
|
||||||
|
Raised if a transcript could not be retrieved.
|
||||||
|
"""
|
||||||
|
|
||||||
|
ERROR_MESSAGE = (
|
||||||
|
'Could not get the transcript for the video {video_url}! '
|
||||||
|
'Most likely subtitles have been disabled by the uploader or the video is no longer '
|
||||||
|
'available.'
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self, video_id):
|
||||||
|
super(YouTubeTranscriptApi.CouldNotRetrieveTranscript, self).__init__(
|
||||||
|
self.ERROR_MESSAGE.format(video_url=_TranscriptFetcher.WATCH_URL.format(video_id=video_id))
|
||||||
|
)
|
||||||
|
self.video_id = video_id
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get(*video_ids):
|
def get_transcripts(*video_ids, continue_after_error=False):
|
||||||
|
"""
|
||||||
|
Retrieves the transcripts for a list of videos.
|
||||||
|
|
||||||
|
:param video_ids: a list of youtube video ids
|
||||||
|
:type video_ids: [str]
|
||||||
|
:param continue_after_error: if this is set the execution won't be stopped, if an error occurs while retrieving
|
||||||
|
one of the video transcripts
|
||||||
|
:type continue_after_error: bool
|
||||||
|
:return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of
|
||||||
|
video ids, which could not be retrieved
|
||||||
|
:rtype: ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}
|
||||||
|
"""
|
||||||
data = {}
|
data = {}
|
||||||
|
unretrievable_videos = []
|
||||||
|
|
||||||
for video_id in video_ids:
|
for video_id in video_ids:
|
||||||
try:
|
try:
|
||||||
data[video_id] = _TranscriptParser(_TranscriptFetcher(video_id).fetch()).parse()
|
data[video_id] = YouTubeTranscriptApi.get_transcript(video_id)
|
||||||
except Exception:
|
except Exception as exception:
|
||||||
logger.error(
|
if not continue_after_error:
|
||||||
'Could not get the transcript for the video {video_url}! '
|
raise exception
|
||||||
'Most likely subtitles have been disabled by the uploader or the video is no longer '
|
|
||||||
'available.'.format(
|
|
||||||
video_url=_TranscriptFetcher.WATCH_URL.format(video_id=video_id)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return data
|
unretrievable_videos.append(video_id)
|
||||||
|
|
||||||
|
return data, unretrievable_videos
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_transcript(video_id):
|
||||||
|
"""
|
||||||
|
Retrieves the transcript for a single video.
|
||||||
|
|
||||||
|
:param video_id: the youtube video id
|
||||||
|
:type video_id: str
|
||||||
|
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
|
||||||
|
:rtype: [{'text': str, 'start': float, 'end': float}]
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return _TranscriptParser(_TranscriptFetcher(video_id).fetch()).parse()
|
||||||
|
except Exception:
|
||||||
|
logger.error(
|
||||||
|
YouTubeTranscriptApi.CouldNotRetrieveTranscript.ERROR_MESSAGE.format(
|
||||||
|
video_url=_TranscriptFetcher.WATCH_URL.format(video_id=video_id)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
raise YouTubeTranscriptApi.CouldNotRetrieveTranscript(video_id)
|
||||||
|
|
||||||
|
|
||||||
class _TranscriptFetcher():
|
class _TranscriptFetcher():
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue