error handling improved
This commit is contained in:
parent
55d76a158a
commit
91fe71c86c
|
@ -6,28 +6,75 @@ import logging
|
|||
|
||||
import requests
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class YouTubeTranscriptApi():
|
||||
class CouldNotRetrieveTranscript(Exception):
|
||||
"""
|
||||
Raised if a transcript could not be retrieved.
|
||||
"""
|
||||
|
||||
ERROR_MESSAGE = (
|
||||
'Could not get the transcript for the video {video_url}! '
|
||||
'Most likely subtitles have been disabled by the uploader or the video is no longer '
|
||||
'available.'
|
||||
)
|
||||
|
||||
def __init__(self, video_id):
|
||||
super(YouTubeTranscriptApi.CouldNotRetrieveTranscript, self).__init__(
|
||||
self.ERROR_MESSAGE.format(video_url=_TranscriptFetcher.WATCH_URL.format(video_id=video_id))
|
||||
)
|
||||
self.video_id = video_id
|
||||
|
||||
|
||||
@staticmethod
|
||||
def get(*video_ids):
|
||||
def get_transcripts(*video_ids, continue_after_error=False):
|
||||
"""
|
||||
Retrieves the transcripts for a list of videos.
|
||||
|
||||
:param video_ids: a list of youtube video ids
|
||||
:type video_ids: [str]
|
||||
:param continue_after_error: if this is set the execution won't be stopped, if an error occurs while retrieving
|
||||
one of the video transcripts
|
||||
:type continue_after_error: bool
|
||||
:return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of
|
||||
video ids, which could not be retrieved
|
||||
:rtype: ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}
|
||||
"""
|
||||
data = {}
|
||||
unretrievable_videos = []
|
||||
|
||||
for video_id in video_ids:
|
||||
try:
|
||||
data[video_id] = _TranscriptParser(_TranscriptFetcher(video_id).fetch()).parse()
|
||||
data[video_id] = YouTubeTranscriptApi.get_transcript(video_id)
|
||||
except Exception as exception:
|
||||
if not continue_after_error:
|
||||
raise exception
|
||||
|
||||
unretrievable_videos.append(video_id)
|
||||
|
||||
return data, unretrievable_videos
|
||||
|
||||
@staticmethod
|
||||
def get_transcript(video_id):
|
||||
"""
|
||||
Retrieves the transcript for a single video.
|
||||
|
||||
:param video_id: the youtube video id
|
||||
:type video_id: str
|
||||
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
|
||||
:rtype: [{'text': str, 'start': float, 'end': float}]
|
||||
"""
|
||||
try:
|
||||
return _TranscriptParser(_TranscriptFetcher(video_id).fetch()).parse()
|
||||
except Exception:
|
||||
logger.error(
|
||||
'Could not get the transcript for the video {video_url}! '
|
||||
'Most likely subtitles have been disabled by the uploader or the video is no longer '
|
||||
'available.'.format(
|
||||
YouTubeTranscriptApi.CouldNotRetrieveTranscript.ERROR_MESSAGE.format(
|
||||
video_url=_TranscriptFetcher.WATCH_URL.format(video_id=video_id)
|
||||
)
|
||||
)
|
||||
|
||||
return data
|
||||
raise YouTubeTranscriptApi.CouldNotRetrieveTranscript(video_id)
|
||||
|
||||
|
||||
class _TranscriptFetcher():
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue