import requests from ._transcripts import TranscriptDataFetcher class YouTubeTranscriptApi(): @classmethod def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None): """ Retrieves the transcripts for a list of videos. :param video_ids: a list of youtube video ids :type video_ids: [str] :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to do so. As I can't provide a complete list of all working language codes with full certainty, you may have to play around with the language codes a bit, to find the one which is working for you! :type languages: [str] :param continue_after_error: if this is set the execution won't be stopped, if an error occurs while retrieving one of the video transcripts :type continue_after_error: bool :param proxies: a dictionary mapping of http and https proxies to be used for the network requests :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of video ids, which could not be retrieved :rtype: ({str: [{'text': str, 'start': float, 'end': float}]}, [str]} """ data = {} unretrievable_videos = [] for video_id in video_ids: try: data[video_id] = cls.get_transcript(video_id, languages, proxies) except Exception as exception: if not continue_after_error: raise exception unretrievable_videos.append(video_id) return data, unretrievable_videos @classmethod def get_transcript(cls, video_id, languages=('en',), proxies=None): """ Retrieves the transcript for a single video. :param video_id: the youtube video id :type video_id: str :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to do so. As I can't provide a complete list of all working language codes with full certainty, you may have to play around with the language codes a bit, to find the one which is working for you! :type languages: [str] :param proxies: a dictionary mapping of http and https proxies to be used for the network requests :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys :rtype: [{'text': str, 'start': float, 'end': float}] """ with requests.Session() as http_client: http_client.proxies = proxies if proxies else {} return TranscriptDataFetcher(http_client).fetch(video_id).find_transcript(languages).fetch()