From 1bc50875754d69aac0de519a07749b6ccc54eec3 Mon Sep 17 00:00:00 2001
From: Jonas Depoix <jdepoix@seibert-media.net>
Date: Mon, 30 Dec 2019 15:20:47 +0100
Subject: [PATCH] added public list_transcripts method

---
 youtube_transcript_api/_api.py         | 69 ++++++++++++++++++++++----
 youtube_transcript_api/_transcripts.py | 16 +++---
 2 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/youtube_transcript_api/_api.py b/youtube_transcript_api/_api.py
index 3476b9b..c1519ae 100644
--- a/youtube_transcript_api/_api.py
+++ b/youtube_transcript_api/_api.py
@@ -4,17 +4,68 @@ from ._transcripts import TranscriptListFetcher
 
 
 class YouTubeTranscriptApi():
+    @classmethod
+    def list_transcripts(cls, video_id, proxies=None):
+        """
+        Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object
+        which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating
+        over the `TranscriptList` the individual transcripts are represented by `Transcript` objects, which provide
+        metadata and can either be fetched by calling `transcript.fetch()` or translated by calling
+        `transcript.translate('en')`. Example::
+
+            # retrieve the available transcripts
+            transcript_list = YouTubeTranscriptApi.get('video_id')
+
+            # iterate over all available transcripts
+            for transcript in transcript_list:
+                # the Transcript object provides metadata properties
+                print(
+                    transcript.video_id,
+                    transcript.language,
+                    transcript.language_code,
+                    # whether it has been manually created or generated by YouTube
+                    transcript.is_generated,
+                    # a list of languages the transcript can be translated to
+                    transcript.translation_languages,
+                )
+
+                # fetch the actual transcript data
+                print(transcript.fetch())
+
+                # translating the transcript will return another transcript object
+                print(transcript.translate('en').fetch())
+
+            # you can also directly filter for the language you are looking for, using the transcript list
+            transcript = transcript_list.find_transcript(['de', 'en'])
+
+            # or just filter for manually created transcripts
+            transcript = transcript_list.find_manually_created_transcript(['de', 'en'])
+
+            # or automatically generated ones
+            transcript = transcript_list.find_generated_transcript(['de', 'en'])
+
+        :param video_id: the youtube video id
+        :type video_id: str
+        :param proxies: a dictionary mapping of http and https proxies to be used for the network requests
+        :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
+        :return: the list of available transcripts
+        :rtype TranscriptList:
+        """
+        with requests.Session() as http_client:
+            http_client.proxies = proxies if proxies else {}
+            return TranscriptListFetcher(http_client).fetch(video_id)
+
     @classmethod
     def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None):
         """
         Retrieves the transcripts for a list of videos.
 
         :param video_ids: a list of youtube video ids
-        :type video_ids: [str]
+        :type video_ids: list[str]
         :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en']
         it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to
         do so.
-        :type languages: [str]
+        :type languages: list[str]
         :param continue_after_error: if this is set the execution won't be stopped, if an error occurs while retrieving
         one of the video transcripts
         :type continue_after_error: bool
@@ -22,7 +73,7 @@ class YouTubeTranscriptApi():
         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
         :return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of
         video ids, which could not be retrieved
-        :rtype: ({str: [{'text': str, 'start': float, 'end': float}]}, [str]})
+        :rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}):
         """
         data = {}
         unretrievable_videos = []
@@ -41,19 +92,19 @@ class YouTubeTranscriptApi():
     @classmethod
     def get_transcript(cls, video_id, languages=('en',), proxies=None):
         """
-        Retrieves the transcript for a single video.
+        Retrieves the transcript for a single video. This is just a shortcut for calling::
+
+            YouTubeTranscriptApi.list_transcripts(video_id, proxies).find_transcript(languages).fetch()
 
         :param video_id: the youtube video id
         :type video_id: str
         :param languages: A list of language codes in a descending priority. For example, if this is set to ['de', 'en']
         it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails to
         do so.
-        :type languages: [str]
+        :type languages: list[str]
         :param proxies: a dictionary mapping of http and https proxies to be used for the network requests
         :type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
-        :rtype: [{'text': str, 'start': float, 'end': float}]
+        :rtype [{'text': str, 'start': float, 'end': float}]:
         """
-        with requests.Session() as http_client:
-            http_client.proxies = proxies if proxies else {}
-            return TranscriptListFetcher(http_client).fetch(video_id).find_transcript(languages).fetch()
+        return cls.list_transcripts(video_id, proxies).find_transcript(languages).fetch()
diff --git a/youtube_transcript_api/_transcripts.py b/youtube_transcript_api/_transcripts.py
index 19e9044..6b767ff 100644
--- a/youtube_transcript_api/_transcripts.py
+++ b/youtube_transcript_api/_transcripts.py
@@ -95,7 +95,7 @@ class TranscriptList():
         :param captions_json: the JSON parsed from the YouTube pages static HTML
         :type captions_json: dict
         :return: the created TranscriptList
-        :rtype TranscriptList
+        :rtype TranscriptList:
         """
         translation_languages = [
             {
@@ -142,9 +142,9 @@ class TranscriptList():
         :param language_codes: A list of language codes in a descending priority. For example, if this is set to
         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
         it fails to do so.
-        :type languages: [str]
+        :type languages: list[str]
         :return: the found Transcript
-        :rtype: Transcript
+        :rtype Transcript:
         :raises: NoTranscriptFound
         """
         return self._find_transcript(language_codes, [self._manually_created_transcripts, self._generated_transcripts])
@@ -156,9 +156,9 @@ class TranscriptList():
         :param language_codes: A list of language codes in a descending priority. For example, if this is set to
         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
         it fails to do so.
-        :type languages: [str]
+        :type languages: list[str]
         :return: the found Transcript
-        :rtype: Transcript
+        :rtype Transcript:
         :raises: NoTranscriptFound
         """
         return self._find_transcript(language_codes, [self._generated_transcripts,])
@@ -170,9 +170,9 @@ class TranscriptList():
         :param language_codes: A list of language codes in a descending priority. For example, if this is set to
         ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
         it fails to do so.
-        :type languages: [str]
+        :type languages: list[str]
         :return: the found Transcript
-        :rtype: Transcript
+        :rtype Transcript:
         :raises: NoTranscriptFound
         """
         return self._find_transcript(language_codes, [self._manually_created_transcripts,])
@@ -252,7 +252,7 @@ class Transcript():
         Loads the actual transcript data.
 
         :return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
-        :rtype: [{'text': str, 'start': float, 'end': float}]
+        :rtype [{'text': str, 'start': float, 'end': float}]:
         """
         return _TranscriptParser().parse(
             self._http_client.get(self._url).text