diff --git a/poetry.lock b/poetry.lock
index f87bcde..b3fc681 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,5 +1,51 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+[[package]]
+name = "black"
+version = "24.8.0"
+description = "The uncompromising code formatter."
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "black-24.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:09cdeb74d494ec023ded657f7092ba518e8cf78fa8386155e4a03fdcc44679e6"},
+ {file = "black-24.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:81c6742da39f33b08e791da38410f32e27d632260e599df7245cccee2064afeb"},
+ {file = "black-24.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:707a1ca89221bc8a1a64fb5e15ef39cd755633daa672a9db7498d1c19de66a42"},
+ {file = "black-24.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d6417535d99c37cee4091a2f24eb2b6d5ec42b144d50f1f2e436d9fe1916fe1a"},
+ {file = "black-24.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fb6e2c0b86bbd43dee042e48059c9ad7830abd5c94b0bc518c0eeec57c3eddc1"},
+ {file = "black-24.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:837fd281f1908d0076844bc2b801ad2d369c78c45cf800cad7b61686051041af"},
+ {file = "black-24.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:62e8730977f0b77998029da7971fa896ceefa2c4c4933fcd593fa599ecbf97a4"},
+ {file = "black-24.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:72901b4913cbac8972ad911dc4098d5753704d1f3c56e44ae8dce99eecb0e3af"},
+ {file = "black-24.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7c046c1d1eeb7aea9335da62472481d3bbf3fd986e093cffd35f4385c94ae368"},
+ {file = "black-24.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:649f6d84ccbae73ab767e206772cc2d7a393a001070a4c814a546afd0d423aed"},
+ {file = "black-24.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2b59b250fdba5f9a9cd9d0ece6e6d993d91ce877d121d161e4698af3eb9c1018"},
+ {file = "black-24.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:6e55d30d44bed36593c3163b9bc63bf58b3b30e4611e4d88a0c3c239930ed5b2"},
+ {file = "black-24.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:505289f17ceda596658ae81b61ebbe2d9b25aa78067035184ed0a9d855d18afd"},
+ {file = "black-24.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b19c9ad992c7883ad84c9b22aaa73562a16b819c1d8db7a1a1a49fb7ec13c7d2"},
+ {file = "black-24.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f13f7f386f86f8121d76599114bb8c17b69d962137fc70efe56137727c7047e"},
+ {file = "black-24.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:f490dbd59680d809ca31efdae20e634f3fae27fba3ce0ba3208333b713bc3920"},
+ {file = "black-24.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eab4dd44ce80dea27dc69db40dab62d4ca96112f87996bca68cd75639aeb2e4c"},
+ {file = "black-24.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3c4285573d4897a7610054af5a890bde7c65cb466040c5f0c8b732812d7f0e5e"},
+ {file = "black-24.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e84e33b37be070ba135176c123ae52a51f82306def9f7d063ee302ecab2cf47"},
+ {file = "black-24.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:73bbf84ed136e45d451a260c6b73ed674652f90a2b3211d6a35e78054563a9bb"},
+ {file = "black-24.8.0-py3-none-any.whl", hash = "sha256:972085c618ee94f402da1af548a4f218c754ea7e5dc70acb168bfaca4c2542ed"},
+ {file = "black-24.8.0.tar.gz", hash = "sha256:2500945420b6784c38b9ee885af039f5e7471ef284ab03fa35ecdde4688cd83f"},
+]
+
+[package.dependencies]
+click = ">=8.0.0"
+mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
+platformdirs = ">=2"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
+
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
+jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
+uvloop = ["uvloop (>=0.15.2)"]
+
[[package]]
name = "certifi"
version = "2024.8.30"
@@ -110,6 +156,20 @@ files = [
{file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
]
+[[package]]
+name = "click"
+version = "8.1.7"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
+ {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
[[package]]
name = "colorama"
version = "0.4.6"
@@ -302,6 +362,17 @@ build = ["blurb", "twine", "wheel"]
docs = ["sphinx"]
test = ["pytest", "pytest-cov"]
+[[package]]
+name = "mypy-extensions"
+version = "1.0.0"
+description = "Type system extensions for programs checked with the mypy type checker."
+optional = false
+python-versions = ">=3.5"
+files = [
+ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
+ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
+]
+
[[package]]
name = "packaging"
version = "24.1"
@@ -313,6 +384,33 @@ files = [
{file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
]
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
+ {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
+]
+
+[[package]]
+name = "platformdirs"
+version = "4.3.6"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
+ {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
+]
+
+[package.extras]
+docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
+type = ["mypy (>=1.11.2)"]
+
[[package]]
name = "pluggy"
version = "1.5.0"
@@ -382,6 +480,17 @@ files = [
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
]
+[[package]]
+name = "typing-extensions"
+version = "4.12.2"
+description = "Backported and Experimental Type Hints for Python 3.8+"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+ {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
+]
+
[[package]]
name = "urllib3"
version = "2.2.3"
@@ -402,4 +511,4 @@ zstd = ["zstandard (>=0.18.0)"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8,<3.13"
-content-hash = "ae3ea36431a2a24e1d07e7c6e251fe7490b86edd928c22eda084e3cb974aaa99"
+content-hash = "4c2e7d294773ea148b69f961053a9469630c48b88248903ead43e41a2838ff94"
diff --git a/pyproject.toml b/pyproject.toml
index 5176f40..5720af7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,6 +40,7 @@ youtube_transcript_api = "youtube_transcript_api.__main__:main"
[tool.poe.tasks]
test = "pytest youtube_transcript_api"
coverage.shell = "pytest youtube_transcript_api && coverage report -m"
+format = "black youtube_transcript_api"
[tool.poetry.dependencies]
python = ">=3.8,<3.13"
@@ -51,6 +52,7 @@ coverage = "^7.6.1"
mock = "^5.1.0"
httpretty = "^1.1.4"
coveralls = "^4.0.1"
+black = "^24.8.0"
[tool.coverage.run]
source = ["youtube_transcript_api"]
diff --git a/youtube_transcript_api/__main__.py b/youtube_transcript_api/__main__.py
index f756560..5b96393 100644
--- a/youtube_transcript_api/__main__.py
+++ b/youtube_transcript_api/__main__.py
@@ -11,5 +11,5 @@ def main():
print(YouTubeTranscriptCli(sys.argv[1:]).run())
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/youtube_transcript_api/_api.py b/youtube_transcript_api/_api.py
index 24a1236..bf1f240 100644
--- a/youtube_transcript_api/_api.py
+++ b/youtube_transcript_api/_api.py
@@ -1,17 +1,17 @@
import requests
-try: # pragma: no cover
+
+try: # pragma: no cover
import http.cookiejar as cookiejar
+
CookieLoadError = (FileNotFoundError, cookiejar.LoadError)
-except ImportError: # pragma: no cover
+except ImportError: # pragma: no cover
import cookielib as cookiejar
+
CookieLoadError = IOError
from ._transcripts import TranscriptListFetcher
-from ._errors import (
- CookiePathInvalid,
- CookiesInvalid
-)
+from ._errors import CookiePathInvalid, CookiesInvalid
class YouTubeTranscriptApi(object):
@@ -71,8 +71,15 @@ class YouTubeTranscriptApi(object):
return TranscriptListFetcher(http_client).fetch(video_id)
@classmethod
- def get_transcripts(cls, video_ids, languages=('en',), continue_after_error=False, proxies=None,
- cookies=None, preserve_formatting=False):
+ def get_transcripts(
+ cls,
+ video_ids,
+ languages=("en",),
+ continue_after_error=False,
+ proxies=None,
+ cookies=None,
+ preserve_formatting=False,
+ ):
"""
Retrieves the transcripts for a list of videos.
@@ -102,7 +109,9 @@ class YouTubeTranscriptApi(object):
for video_id in video_ids:
try:
- data[video_id] = cls.get_transcript(video_id, languages, proxies, cookies, preserve_formatting)
+ data[video_id] = cls.get_transcript(
+ video_id, languages, proxies, cookies, preserve_formatting
+ )
except Exception as exception:
if not continue_after_error:
raise exception
@@ -112,7 +121,14 @@ class YouTubeTranscriptApi(object):
return data, unretrievable_videos
@classmethod
- def get_transcript(cls, video_id, languages=('en',), proxies=None, cookies=None, preserve_formatting=False):
+ def get_transcript(
+ cls,
+ video_id,
+ languages=("en",),
+ proxies=None,
+ cookies=None,
+ preserve_formatting=False,
+ ):
"""
Retrieves the transcript for a single video. This is just a shortcut for calling::
@@ -134,7 +150,11 @@ class YouTubeTranscriptApi(object):
:rtype [{'text': str, 'start': float, 'end': float}]:
"""
assert isinstance(video_id, str), "`video_id` must be a string"
- return cls.list_transcripts(video_id, proxies, cookies).find_transcript(languages).fetch(preserve_formatting=preserve_formatting)
+ return (
+ cls.list_transcripts(video_id, proxies, cookies)
+ .find_transcript(languages)
+ .fetch(preserve_formatting=preserve_formatting)
+ )
@classmethod
def _load_cookies(cls, cookies, video_id):
diff --git a/youtube_transcript_api/_cli.py b/youtube_transcript_api/_cli.py
index a9cbf75..09f76ba 100644
--- a/youtube_transcript_api/_cli.py
+++ b/youtube_transcript_api/_cli.py
@@ -13,10 +13,10 @@ class YouTubeTranscriptCli(object):
parsed_args = self._parse_args()
if parsed_args.exclude_manually_created and parsed_args.exclude_generated:
- return ''
+ return ""
proxies = None
- if parsed_args.http_proxy != '' or parsed_args.https_proxy != '':
+ if parsed_args.http_proxy != "" or parsed_args.https_proxy != "":
proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy}
cookies = parsed_args.cookies
@@ -26,25 +26,41 @@ class YouTubeTranscriptCli(object):
for video_id in parsed_args.video_ids:
try:
- transcripts.append(self._fetch_transcript(parsed_args, proxies, cookies, video_id))
+ transcripts.append(
+ self._fetch_transcript(parsed_args, proxies, cookies, video_id)
+ )
except Exception as exception:
exceptions.append(exception)
- return '\n\n'.join(
+ return "\n\n".join(
[str(exception) for exception in exceptions]
- + ([FormatterLoader().load(parsed_args.format).format_transcripts(transcripts)] if transcripts else [])
+ + (
+ [
+ FormatterLoader()
+ .load(parsed_args.format)
+ .format_transcripts(transcripts)
+ ]
+ if transcripts
+ else []
+ )
)
def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies, cookies=cookies)
+ transcript_list = YouTubeTranscriptApi.list_transcripts(
+ video_id, proxies=proxies, cookies=cookies
+ )
if parsed_args.list_transcripts:
return str(transcript_list)
if parsed_args.exclude_manually_created:
- transcript = transcript_list.find_generated_transcript(parsed_args.languages)
+ transcript = transcript_list.find_generated_transcript(
+ parsed_args.languages
+ )
elif parsed_args.exclude_generated:
- transcript = transcript_list.find_manually_created_transcript(parsed_args.languages)
+ transcript = transcript_list.find_manually_created_transcript(
+ parsed_args.languages
+ )
else:
transcript = transcript_list.find_transcript(parsed_args.languages)
@@ -56,80 +72,84 @@ class YouTubeTranscriptCli(object):
def _parse_args(self):
parser = argparse.ArgumentParser(
description=(
- 'This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. '
- 'It also works for automatically generated subtitles and it does not require a headless browser, like '
- 'other selenium based solutions do!'
+ "This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. "
+ "It also works for automatically generated subtitles and it does not require a headless browser, like "
+ "other selenium based solutions do!"
)
)
parser.add_argument(
- '--list-transcripts',
- action='store_const',
+ "--list-transcripts",
+ action="store_const",
const=True,
default=False,
- help='This will list the languages in which the given videos are available in.',
+ help="This will list the languages in which the given videos are available in.",
)
- parser.add_argument('video_ids', nargs='+', type=str, help='List of YouTube video IDs.')
parser.add_argument(
- '--languages',
- nargs='*',
- default=['en',],
+ "video_ids", nargs="+", type=str, help="List of YouTube video IDs."
+ )
+ parser.add_argument(
+ "--languages",
+ nargs="*",
+ default=[
+ "en",
+ ],
type=str,
help=(
'A list of language codes in a descending priority. For example, if this is set to "de en" it will '
- 'first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails '
- 'to do so. As I can\'t provide a complete list of all working language codes with full certainty, you '
- 'may have to play around with the language codes a bit, to find the one which is working for you!'
+ "first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails "
+ "to do so. As I can't provide a complete list of all working language codes with full certainty, you "
+ "may have to play around with the language codes a bit, to find the one which is working for you!"
),
)
parser.add_argument(
- '--exclude-generated',
- action='store_const',
+ "--exclude-generated",
+ action="store_const",
const=True,
default=False,
- help='If this flag is set transcripts which have been generated by YouTube will not be retrieved.',
+ help="If this flag is set transcripts which have been generated by YouTube will not be retrieved.",
)
parser.add_argument(
- '--exclude-manually-created',
- action='store_const',
+ "--exclude-manually-created",
+ action="store_const",
const=True,
default=False,
- help='If this flag is set transcripts which have been manually created will not be retrieved.',
+ help="If this flag is set transcripts which have been manually created will not be retrieved.",
)
parser.add_argument(
- '--format',
+ "--format",
type=str,
- default='pretty',
+ default="pretty",
choices=tuple(FormatterLoader.TYPES.keys()),
)
parser.add_argument(
- '--translate',
- default='',
+ "--translate",
+ default="",
help=(
- 'The language code for the language you want this transcript to be translated to. Use the '
- '--list-transcripts feature to find out which languages are translatable and which translation '
- 'languages are available.'
- )
+ "The language code for the language you want this transcript to be translated to. Use the "
+ "--list-transcripts feature to find out which languages are translatable and which translation "
+ "languages are available."
+ ),
)
parser.add_argument(
- '--http-proxy',
- default='',
- metavar='URL',
- help='Use the specified HTTP proxy.'
+ "--http-proxy",
+ default="",
+ metavar="URL",
+ help="Use the specified HTTP proxy.",
)
parser.add_argument(
- '--https-proxy',
- default='',
- metavar='URL',
- help='Use the specified HTTPS proxy.'
+ "--https-proxy",
+ default="",
+ metavar="URL",
+ help="Use the specified HTTPS proxy.",
)
parser.add_argument(
- '--cookies',
+ "--cookies",
default=None,
- help='The cookie file that will be used for authorization with youtube.'
+ help="The cookie file that will be used for authorization with youtube.",
)
-
+
return self._sanitize_video_ids(parser.parse_args(self._args))
def _sanitize_video_ids(self, args):
- args.video_ids = [video_id.replace('\\', '') for video_id in args.video_ids]
+ args.video_ids = [video_id.replace("\\", "") for video_id in args.video_ids]
return args
diff --git a/youtube_transcript_api/_errors.py b/youtube_transcript_api/_errors.py
index d652c59..df4b0ad 100644
--- a/youtube_transcript_api/_errors.py
+++ b/youtube_transcript_api/_errors.py
@@ -5,16 +5,17 @@ class CouldNotRetrieveTranscript(Exception):
"""
Raised if a transcript could not be retrieved.
"""
- ERROR_MESSAGE = '\nCould not retrieve a transcript for the video {video_url}!'
- CAUSE_MESSAGE_INTRO = ' This is most likely caused by:\n\n{cause}'
- CAUSE_MESSAGE = ''
+
+ ERROR_MESSAGE = "\nCould not retrieve a transcript for the video {video_url}!"
+ CAUSE_MESSAGE_INTRO = " This is most likely caused by:\n\n{cause}"
+ CAUSE_MESSAGE = ""
GITHUB_REFERRAL = (
- '\n\nIf you are sure that the described cause is not responsible for this error '
- 'and that a transcript should be retrievable, please create an issue at '
- 'https://github.com/jdepoix/youtube-transcript-api/issues. '
- 'Please add which version of youtube_transcript_api you are using '
- 'and provide the information needed to replicate the error. '
- 'Also make sure that there are no open issues which already describe your problem!'
+ "\n\nIf you are sure that the described cause is not responsible for this error "
+ "and that a transcript should be retrievable, please create an issue at "
+ "https://github.com/jdepoix/youtube-transcript-api/issues. "
+ "Please add which version of youtube_transcript_api you are using "
+ "and provide the information needed to replicate the error. "
+ "Also make sure that there are no open issues which already describe your problem!"
)
def __init__(self, video_id):
@@ -23,10 +24,14 @@ class CouldNotRetrieveTranscript(Exception):
def _build_error_message(self):
cause = self.cause
- error_message = self.ERROR_MESSAGE.format(video_url=WATCH_URL.format(video_id=self.video_id))
+ error_message = self.ERROR_MESSAGE.format(
+ video_url=WATCH_URL.format(video_id=self.video_id)
+ )
if cause:
- error_message += self.CAUSE_MESSAGE_INTRO.format(cause=cause) + self.GITHUB_REFERRAL
+ error_message += (
+ self.CAUSE_MESSAGE_INTRO.format(cause=cause) + self.GITHUB_REFERRAL
+ )
return error_message
@@ -36,7 +41,7 @@ class CouldNotRetrieveTranscript(Exception):
class YouTubeRequestFailed(CouldNotRetrieveTranscript):
- CAUSE_MESSAGE = 'Request to YouTube failed: {reason}'
+ CAUSE_MESSAGE = "Request to YouTube failed: {reason}"
def __init__(self, video_id, http_error):
self.reason = str(http_error)
@@ -50,12 +55,12 @@ class YouTubeRequestFailed(CouldNotRetrieveTranscript):
class VideoUnavailable(CouldNotRetrieveTranscript):
- CAUSE_MESSAGE = 'The video is no longer available'
+ CAUSE_MESSAGE = "The video is no longer available"
class InvalidVideoId(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = (
- 'You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\n'
+ "You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\n"
'Do NOT run: `YouTubeTranscriptApi.get_transcript("https://www.youtube.com/watch?v=1234")`\n'
'Instead run: `YouTubeTranscriptApi.get_transcript("1234")`'
)
@@ -63,48 +68,48 @@ class InvalidVideoId(CouldNotRetrieveTranscript):
class TooManyRequests(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = (
- 'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. '
- 'One of the following things can be done to work around this:\n\
- - Manually solve the captcha in a browser and export the cookie. '
- 'Read here how to use that cookie with '
- 'youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\
+ "YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. "
+ "One of the following things can be done to work around this:\n\
+ - Manually solve the captcha in a browser and export the cookie. "
+ "Read here how to use that cookie with "
+ "youtube-transcript-api: https://github.com/jdepoix/youtube-transcript-api#cookies\n\
- Use a different IP address\n\
- - Wait until the ban on your IP has been lifted'
+ - Wait until the ban on your IP has been lifted"
)
class TranscriptsDisabled(CouldNotRetrieveTranscript):
- CAUSE_MESSAGE = 'Subtitles are disabled for this video'
+ CAUSE_MESSAGE = "Subtitles are disabled for this video"
class NoTranscriptAvailable(CouldNotRetrieveTranscript):
- CAUSE_MESSAGE = 'No transcripts are available for this video'
+ CAUSE_MESSAGE = "No transcripts are available for this video"
class NotTranslatable(CouldNotRetrieveTranscript):
- CAUSE_MESSAGE = 'The requested language is not translatable'
+ CAUSE_MESSAGE = "The requested language is not translatable"
class TranslationLanguageNotAvailable(CouldNotRetrieveTranscript):
- CAUSE_MESSAGE = 'The requested translation language is not available'
+ CAUSE_MESSAGE = "The requested translation language is not available"
class CookiePathInvalid(CouldNotRetrieveTranscript):
- CAUSE_MESSAGE = 'The provided cookie file was unable to be loaded'
+ CAUSE_MESSAGE = "The provided cookie file was unable to be loaded"
class CookiesInvalid(CouldNotRetrieveTranscript):
- CAUSE_MESSAGE = 'The cookies provided are not valid (may have expired)'
+ CAUSE_MESSAGE = "The cookies provided are not valid (may have expired)"
class FailedToCreateConsentCookie(CouldNotRetrieveTranscript):
- CAUSE_MESSAGE = 'Failed to automatically give consent to saving cookies'
+ CAUSE_MESSAGE = "Failed to automatically give consent to saving cookies"
class NoTranscriptFound(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = (
- 'No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n'
- '{transcript_data}'
+ "No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n"
+ "{transcript_data}"
)
def __init__(self, video_id, requested_language_codes, transcript_data):
diff --git a/youtube_transcript_api/_html_unescaping.py b/youtube_transcript_api/_html_unescaping.py
index 3efdf4b..6654d70 100644
--- a/youtube_transcript_api/_html_unescaping.py
+++ b/youtube_transcript_api/_html_unescaping.py
@@ -2,10 +2,10 @@ import sys
# This can only be tested by using different python versions, therefore it is not covered by coverage.py
-if sys.version_info.major == 3 and sys.version_info.minor >= 4: # pragma: no cover
+if sys.version_info.major == 3 and sys.version_info.minor >= 4: # pragma: no cover
# Python 3.4+
from html import unescape
-else: # pragma: no cover
+else: # pragma: no cover
if sys.version_info.major <= 2:
# Python 2
import HTMLParser
diff --git a/youtube_transcript_api/_settings.py b/youtube_transcript_api/_settings.py
index b1f7dfe..585b863 100644
--- a/youtube_transcript_api/_settings.py
+++ b/youtube_transcript_api/_settings.py
@@ -1 +1 @@
-WATCH_URL = 'https://www.youtube.com/watch?v={video_id}'
+WATCH_URL = "https://www.youtube.com/watch?v={video_id}"
diff --git a/youtube_transcript_api/_transcripts.py b/youtube_transcript_api/_transcripts.py
index ef1f44b..7ce4d2e 100644
--- a/youtube_transcript_api/_transcripts.py
+++ b/youtube_transcript_api/_transcripts.py
@@ -3,7 +3,7 @@ import sys
# This can only be tested by using different python versions, therefore it is not covered by coverage.py
if sys.version_info.major == 2: # pragma: no cover
reload(sys)
- sys.setdefaultencoding('utf-8')
+ sys.setdefaultencoding("utf-8")
import json
@@ -52,7 +52,7 @@ class TranscriptListFetcher(object):
splitted_html = html.split('"captions":')
if len(splitted_html) <= 1:
- if video_id.startswith('http://') or video_id.startswith('https://'):
+ if video_id.startswith("http://") or video_id.startswith("https://"):
raise InvalidVideoId(video_id)
if 'class="g-recaptcha"' in html:
raise TooManyRequests(video_id)
@@ -62,12 +62,12 @@ class TranscriptListFetcher(object):
raise TranscriptsDisabled(video_id)
captions_json = json.loads(
- splitted_html[1].split(',"videoDetails')[0].replace('\n', '')
- ).get('playerCaptionsTracklistRenderer')
+ splitted_html[1].split(',"videoDetails')[0].replace("\n", "")
+ ).get("playerCaptionsTracklistRenderer")
if captions_json is None:
raise TranscriptsDisabled(video_id)
- if 'captionTracks' not in captions_json:
+ if "captionTracks" not in captions_json:
raise NoTranscriptAvailable(video_id)
return captions_json
@@ -76,7 +76,9 @@ class TranscriptListFetcher(object):
match = re.search('name="v" value="(.*?)"', html)
if match is None:
raise FailedToCreateConsentCookie(video_id)
- self._http_client.cookies.set('CONSENT', 'YES+' + match.group(1), domain='.youtube.com')
+ self._http_client.cookies.set(
+ "CONSENT", "YES+" + match.group(1), domain=".youtube.com"
+ )
def _fetch_video_html(self, video_id):
html = self._fetch_html(video_id)
@@ -88,7 +90,9 @@ class TranscriptListFetcher(object):
return html
def _fetch_html(self, video_id):
- response = self._http_client.get(WATCH_URL.format(video_id=video_id), headers={'Accept-Language': 'en-US'})
+ response = self._http_client.get(
+ WATCH_URL.format(video_id=video_id), headers={"Accept-Language": "en-US"}
+ )
return unescape(_raise_http_errors(response, video_id).text)
@@ -98,7 +102,13 @@ class TranscriptList(object):
for a given YouTube video. Also it provides functionality to search for a transcript in a given language.
"""
- def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages):
+ def __init__(
+ self,
+ video_id,
+ manually_created_transcripts,
+ generated_transcripts,
+ translation_languages,
+ ):
"""
The constructor is only for internal use. Use the static build method instead.
@@ -132,28 +142,29 @@ class TranscriptList(object):
"""
translation_languages = [
{
- 'language': translation_language['languageName']['simpleText'],
- 'language_code': translation_language['languageCode'],
- } for translation_language in captions_json.get('translationLanguages', [])
+ "language": translation_language["languageName"]["simpleText"],
+ "language_code": translation_language["languageCode"],
+ }
+ for translation_language in captions_json.get("translationLanguages", [])
]
manually_created_transcripts = {}
generated_transcripts = {}
- for caption in captions_json['captionTracks']:
- if caption.get('kind', '') == 'asr':
+ for caption in captions_json["captionTracks"]:
+ if caption.get("kind", "") == "asr":
transcript_dict = generated_transcripts
else:
transcript_dict = manually_created_transcripts
- transcript_dict[caption['languageCode']] = Transcript(
+ transcript_dict[caption["languageCode"]] = Transcript(
http_client,
video_id,
- caption['baseUrl'],
- caption['name']['simpleText'],
- caption['languageCode'],
- caption.get('kind', '') == 'asr',
- translation_languages if caption.get('isTranslatable', False) else [],
+ caption["baseUrl"],
+ caption["name"]["simpleText"],
+ caption["languageCode"],
+ caption.get("kind", "") == "asr",
+ translation_languages if caption.get("isTranslatable", False) else [],
)
return TranscriptList(
@@ -164,7 +175,10 @@ class TranscriptList(object):
)
def __iter__(self):
- return iter(list(self._manually_created_transcripts.values()) + list(self._generated_transcripts.values()))
+ return iter(
+ list(self._manually_created_transcripts.values())
+ + list(self._generated_transcripts.values())
+ )
def find_transcript(self, language_codes):
"""
@@ -180,7 +194,10 @@ class TranscriptList(object):
:rtype Transcript:
:raises: NoTranscriptFound
"""
- return self._find_transcript(language_codes, [self._manually_created_transcripts, self._generated_transcripts])
+ return self._find_transcript(
+ language_codes,
+ [self._manually_created_transcripts, self._generated_transcripts],
+ )
def find_generated_transcript(self, language_codes):
"""
@@ -208,7 +225,9 @@ class TranscriptList(object):
:rtype Transcript:
:raises: NoTranscriptFound
"""
- return self._find_transcript(language_codes, [self._manually_created_transcripts])
+ return self._find_transcript(
+ language_codes, [self._manually_created_transcripts]
+ )
def _find_transcript(self, language_codes, transcript_dicts):
for language_code in language_codes:
@@ -216,44 +235,54 @@ class TranscriptList(object):
if language_code in transcript_dict:
return transcript_dict[language_code]
- raise NoTranscriptFound(
- self.video_id,
- language_codes,
- self
- )
+ raise NoTranscriptFound(self.video_id, language_codes, self)
def __str__(self):
return (
- 'For this video ({video_id}) transcripts are available in the following languages:\n\n'
- '(MANUALLY CREATED)\n'
- '{available_manually_created_transcript_languages}\n\n'
- '(GENERATED)\n'
- '{available_generated_transcripts}\n\n'
- '(TRANSLATION LANGUAGES)\n'
- '{available_translation_languages}'
+ "For this video ({video_id}) transcripts are available in the following languages:\n\n"
+ "(MANUALLY CREATED)\n"
+ "{available_manually_created_transcript_languages}\n\n"
+ "(GENERATED)\n"
+ "{available_generated_transcripts}\n\n"
+ "(TRANSLATION LANGUAGES)\n"
+ "{available_translation_languages}"
).format(
video_id=self.video_id,
available_manually_created_transcript_languages=self._get_language_description(
- str(transcript) for transcript in self._manually_created_transcripts.values()
+ str(transcript)
+ for transcript in self._manually_created_transcripts.values()
),
available_generated_transcripts=self._get_language_description(
str(transcript) for transcript in self._generated_transcripts.values()
),
available_translation_languages=self._get_language_description(
'{language_code} ("{language}")'.format(
- language=translation_language['language'],
- language_code=translation_language['language_code'],
- ) for translation_language in self._translation_languages
- )
+ language=translation_language["language"],
+ language_code=translation_language["language_code"],
+ )
+ for translation_language in self._translation_languages
+ ),
)
def _get_language_description(self, transcript_strings):
- description = '\n'.join(' - {transcript}'.format(transcript=transcript) for transcript in transcript_strings)
- return description if description else 'None'
+ description = "\n".join(
+ " - {transcript}".format(transcript=transcript)
+ for transcript in transcript_strings
+ )
+ return description if description else "None"
class Transcript(object):
- def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages):
+ def __init__(
+ self,
+ http_client,
+ video_id,
+ url,
+ language,
+ language_code,
+ is_generated,
+ translation_languages,
+ ):
"""
You probably don't want to initialize this directly. Usually you'll access Transcript objects using a
TranscriptList.
@@ -276,7 +305,7 @@ class Transcript(object):
self.is_generated = is_generated
self.translation_languages = translation_languages
self._translation_languages_dict = {
- translation_language['language_code']: translation_language['language']
+ translation_language["language_code"]: translation_language["language"]
for translation_language in translation_languages
}
@@ -288,7 +317,9 @@ class Transcript(object):
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
:rtype [{'text': str, 'start': float, 'end': float}]:
"""
- response = self._http_client.get(self._url, headers={'Accept-Language': 'en-US'})
+ response = self._http_client.get(
+ self._url, headers={"Accept-Language": "en-US"}
+ )
return _TranscriptParser(preserve_formatting=preserve_formatting).parse(
_raise_http_errors(response, self.video_id).text,
)
@@ -297,7 +328,7 @@ class Transcript(object):
return '{language_code} ("{language}"){translation_description}'.format(
language=self.language,
language_code=self.language_code,
- translation_description='[TRANSLATABLE]' if self.is_translatable else ''
+ translation_description="[TRANSLATABLE]" if self.is_translatable else "",
)
@property
@@ -314,7 +345,9 @@ class Transcript(object):
return Transcript(
self._http_client,
self.video_id,
- '{url}&tlang={language_code}'.format(url=self._url, language_code=language_code),
+ "{url}&tlang={language_code}".format(
+ url=self._url, language_code=language_code
+ ),
self._translation_languages_dict[language_code],
language_code,
True,
@@ -324,16 +357,16 @@ class Transcript(object):
class _TranscriptParser(object):
_FORMATTING_TAGS = [
- 'strong', # important
- 'em', # emphasized
- 'b', # bold
- 'i', # italic
- 'mark', # marked
- 'small', # smaller
- 'del', # deleted
- 'ins', # inserted
- 'sub', # subscript
- 'sup', # superscript
+ "strong", # important
+ "em", # emphasized
+ "b", # bold
+ "i", # italic
+ "mark", # marked
+ "small", # smaller
+ "del", # deleted
+ "ins", # inserted
+ "sub", # subscript
+ "sup", # superscript
]
def __init__(self, preserve_formatting=False):
@@ -341,19 +374,19 @@ class _TranscriptParser(object):
def _get_html_regex(self, preserve_formatting):
if preserve_formatting:
- formats_regex = '|'.join(self._FORMATTING_TAGS)
- formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>'
+ formats_regex = "|".join(self._FORMATTING_TAGS)
+ formats_regex = r"<\/?(?!\/?(" + formats_regex + r")\b).*?\b>"
html_regex = re.compile(formats_regex, re.IGNORECASE)
else:
- html_regex = re.compile(r'<[^>]*>', re.IGNORECASE)
+ html_regex = re.compile(r"<[^>]*>", re.IGNORECASE)
return html_regex
def parse(self, plain_data):
return [
{
- 'text': re.sub(self._html_regex, '', unescape(xml_element.text)),
- 'start': float(xml_element.attrib['start']),
- 'duration': float(xml_element.attrib.get('dur', '0.0')),
+ "text": re.sub(self._html_regex, "", unescape(xml_element.text)),
+ "start": float(xml_element.attrib["start"]),
+ "duration": float(xml_element.attrib.get("dur", "0.0")),
}
for xml_element in ElementTree.fromstring(plain_data)
if xml_element.text is not None
diff --git a/youtube_transcript_api/formatters.py b/youtube_transcript_api/formatters.py
index 387e565..e693d47 100644
--- a/youtube_transcript_api/formatters.py
+++ b/youtube_transcript_api/formatters.py
@@ -12,12 +12,16 @@ class Formatter(object):
"""
def format_transcript(self, transcript, **kwargs):
- raise NotImplementedError('A subclass of Formatter must implement ' \
- 'their own .format_transcript() method.')
+ raise NotImplementedError(
+ "A subclass of Formatter must implement "
+ "their own .format_transcript() method."
+ )
def format_transcripts(self, transcripts, **kwargs):
- raise NotImplementedError('A subclass of Formatter must implement ' \
- 'their own .format_transcripts() method.')
+ raise NotImplementedError(
+ "A subclass of Formatter must implement "
+ "their own .format_transcripts() method."
+ )
class PrettyPrintFormatter(Formatter):
@@ -68,7 +72,7 @@ class TextFormatter(Formatter):
:return: all transcript text lines separated by newline breaks.'
:rtype str
"""
- return '\n'.join(line['text'] for line in transcript)
+ return "\n".join(line["text"] for line in transcript)
def format_transcripts(self, transcripts, **kwargs):
"""Converts a list of transcripts into plain text with no timestamps.
@@ -77,21 +81,30 @@ class TextFormatter(Formatter):
:return: all transcript text lines separated by newline breaks.'
:rtype str
"""
- return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts])
+ return "\n\n\n".join(
+ [self.format_transcript(transcript, **kwargs) for transcript in transcripts]
+ )
+
class _TextBasedFormatter(TextFormatter):
def _format_timestamp(self, hours, mins, secs, ms):
- raise NotImplementedError('A subclass of _TextBasedFormatter must implement ' \
- 'their own .format_timestamp() method.')
+ raise NotImplementedError(
+ "A subclass of _TextBasedFormatter must implement "
+ "their own .format_timestamp() method."
+ )
def _format_transcript_header(self, lines):
- raise NotImplementedError('A subclass of _TextBasedFormatter must implement ' \
- 'their own _format_transcript_header method.')
+ raise NotImplementedError(
+ "A subclass of _TextBasedFormatter must implement "
+ "their own _format_transcript_header method."
+ )
def _format_transcript_helper(self, i, time_text, line):
- raise NotImplementedError('A subclass of _TextBasedFormatter must implement ' \
- 'their own _format_transcript_helper method.')
-
+ raise NotImplementedError(
+ "A subclass of _TextBasedFormatter must implement "
+ "their own _format_transcript_helper method."
+ )
+
def _seconds_to_timestamp(self, time):
"""Helper that converts `time` into a transcript cue timestamp.
@@ -109,26 +122,27 @@ class _TextBasedFormatter(TextFormatter):
hours_float, remainder = divmod(time, 3600)
mins_float, secs_float = divmod(remainder, 60)
hours, mins, secs = int(hours_float), int(mins_float), int(secs_float)
- ms = int(round((time - int(time))*1000, 2))
+ ms = int(round((time - int(time)) * 1000, 2))
return self._format_timestamp(hours, mins, secs, ms)
def format_transcript(self, transcript, **kwargs):
"""A basic implementation of WEBVTT/SRT formatting.
:param transcript:
- :reference:
+ :reference:
https://www.w3.org/TR/webvtt1/#introduction-caption
https://www.3playmedia.com/blog/create-srt-file/
"""
lines = []
for i, line in enumerate(transcript):
- end = line['start'] + line['duration']
+ end = line["start"] + line["duration"]
time_text = "{} --> {}".format(
- self._seconds_to_timestamp(line['start']),
+ self._seconds_to_timestamp(line["start"]),
self._seconds_to_timestamp(
- transcript[i + 1]['start']
- if i < len(transcript) - 1 and transcript[i + 1]['start'] < end else end
- )
+ transcript[i + 1]["start"]
+ if i < len(transcript) - 1 and transcript[i + 1]["start"] < end
+ else end
+ ),
)
lines.append(self._format_transcript_helper(i, time_text, line))
@@ -138,12 +152,12 @@ class _TextBasedFormatter(TextFormatter):
class SRTFormatter(_TextBasedFormatter):
def _format_timestamp(self, hours, mins, secs, ms):
return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, mins, secs, ms)
-
+
def _format_transcript_header(self, lines):
return "\n\n".join(lines) + "\n"
def _format_transcript_helper(self, i, time_text, line):
- return "{}\n{}\n{}".format(i + 1, time_text, line['text'])
+ return "{}\n{}\n{}".format(i + 1, time_text, line["text"])
class WebVTTFormatter(_TextBasedFormatter):
@@ -154,29 +168,29 @@ class WebVTTFormatter(_TextBasedFormatter):
return "WEBVTT\n\n" + "\n\n".join(lines) + "\n"
def _format_transcript_helper(self, i, time_text, line):
- return "{}\n{}".format(time_text, line['text'])
+ return "{}\n{}".format(time_text, line["text"])
class FormatterLoader(object):
TYPES = {
- 'json': JSONFormatter,
- 'pretty': PrettyPrintFormatter,
- 'text': TextFormatter,
- 'webvtt': WebVTTFormatter,
- 'srt' : SRTFormatter,
+ "json": JSONFormatter,
+ "pretty": PrettyPrintFormatter,
+ "text": TextFormatter,
+ "webvtt": WebVTTFormatter,
+ "srt": SRTFormatter,
}
class UnknownFormatterType(Exception):
def __init__(self, formatter_type):
super(FormatterLoader.UnknownFormatterType, self).__init__(
- 'The format \'{formatter_type}\' is not supported. '
- 'Choose one of the following formats: {supported_formatter_types}'.format(
+ "The format '{formatter_type}' is not supported. "
+ "Choose one of the following formats: {supported_formatter_types}".format(
formatter_type=formatter_type,
- supported_formatter_types=', '.join(FormatterLoader.TYPES.keys()),
+ supported_formatter_types=", ".join(FormatterLoader.TYPES.keys()),
)
)
- def load(self, formatter_type='pretty'):
+ def load(self, formatter_type="pretty"):
"""
Loads the Formatter for the given formatter type.
diff --git a/youtube_transcript_api/test/test_api.py b/youtube_transcript_api/test/test_api.py
index 9b5e732..3d2e48c 100644
--- a/youtube_transcript_api/test/test_api.py
+++ b/youtube_transcript_api/test/test_api.py
@@ -25,8 +25,9 @@ from youtube_transcript_api import (
def load_asset(filename):
- filepath = '{dirname}/assets/{filename}'.format(
- dirname=os.path.dirname(__file__), filename=filename)
+ filepath = "{dirname}/assets/{filename}".format(
+ dirname=os.path.dirname(__file__), filename=filename
+ )
with open(filepath, mode="rb") as file:
return file.read()
@@ -37,13 +38,13 @@ class TestYouTubeTranscriptApi(TestCase):
httpretty.enable()
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube.html.static"),
)
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/api/timedtext',
- body=load_asset('transcript.xml.static')
+ "https://www.youtube.com/api/timedtext",
+ body=load_asset("transcript.xml.static"),
)
def tearDown(self):
@@ -51,306 +52,362 @@ class TestYouTubeTranscriptApi(TestCase):
httpretty.disable()
def test_get_transcript(self):
- transcript = YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8')
+ transcript = YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8")
self.assertEqual(
transcript,
[
- {'text': 'Hey, this is just a test', 'start': 0.0, 'duration': 1.54},
- {'text': 'this is not the original transcript', 'start': 1.54, 'duration': 4.16},
- {'text': 'just something shorter, I made up for testing', 'start': 5.7, 'duration': 3.239}
- ]
+ {"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54},
+ {
+ "text": "this is not the original transcript",
+ "start": 1.54,
+ "duration": 4.16,
+ },
+ {
+ "text": "just something shorter, I made up for testing",
+ "start": 5.7,
+ "duration": 3.239,
+ },
+ ],
)
def test_get_transcript_formatted(self):
- transcript = YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', preserve_formatting=True)
+ transcript = YouTubeTranscriptApi.get_transcript(
+ "GJLlxj_dtq8", preserve_formatting=True
+ )
self.assertEqual(
transcript,
[
- {'text': 'Hey, this is just a test', 'start': 0.0, 'duration': 1.54},
- {'text': 'this is not the original transcript', 'start': 1.54, 'duration': 4.16},
- {'text': 'just something shorter, I made up for testing', 'start': 5.7, 'duration': 3.239}
- ]
+ {"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54},
+ {
+ "text": "this is not the original transcript",
+ "start": 1.54,
+ "duration": 4.16,
+ },
+ {
+ "text": "just something shorter, I made up for testing",
+ "start": 5.7,
+ "duration": 3.239,
+ },
+ ],
)
def test_list_transcripts(self):
- transcript_list = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8')
+ transcript_list = YouTubeTranscriptApi.list_transcripts("GJLlxj_dtq8")
language_codes = {transcript.language_code for transcript in transcript_list}
- self.assertEqual(language_codes, {'zh', 'de', 'en', 'hi', 'ja', 'ko', 'es', 'cs', 'en'})
+ self.assertEqual(
+ language_codes, {"zh", "de", "en", "hi", "ja", "ko", "es", "cs", "en"}
+ )
def test_list_transcripts__find_manually_created(self):
- transcript_list = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8')
- transcript = transcript_list.find_manually_created_transcript(['cs'])
+ transcript_list = YouTubeTranscriptApi.list_transcripts("GJLlxj_dtq8")
+ transcript = transcript_list.find_manually_created_transcript(["cs"])
self.assertFalse(transcript.is_generated)
-
def test_list_transcripts__find_generated(self):
- transcript_list = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8')
+ transcript_list = YouTubeTranscriptApi.list_transcripts("GJLlxj_dtq8")
with self.assertRaises(NoTranscriptFound):
- transcript_list.find_generated_transcript(['cs'])
+ transcript_list.find_generated_transcript(["cs"])
- transcript = transcript_list.find_generated_transcript(['en'])
+ transcript = transcript_list.find_generated_transcript(["en"])
self.assertTrue(transcript.is_generated)
def test_list_transcripts__url_as_video_id(self):
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube_transcripts_disabled.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube_transcripts_disabled.html.static"),
)
with self.assertRaises(InvalidVideoId):
- YouTubeTranscriptApi.list_transcripts('https://www.youtube.com/watch?v=GJLlxj_dtq8')
-
+ YouTubeTranscriptApi.list_transcripts(
+ "https://www.youtube.com/watch?v=GJLlxj_dtq8"
+ )
def test_list_transcripts__no_translation_languages_provided(self):
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube_no_translation_languages.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube_no_translation_languages.html.static"),
)
- transcript_list = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8')
+ transcript_list = YouTubeTranscriptApi.list_transcripts("GJLlxj_dtq8")
for transcript in transcript_list:
self.assertEqual(len(transcript.translation_languages), 0)
-
def test_translate_transcript(self):
- transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en'])
+ transcript = YouTubeTranscriptApi.list_transcripts(
+ "GJLlxj_dtq8"
+ ).find_transcript(["en"])
- translated_transcript = transcript.translate('af')
+ translated_transcript = transcript.translate("af")
- self.assertEqual(translated_transcript.language_code, 'af')
- self.assertIn('&tlang=af', translated_transcript._url)
+ self.assertEqual(translated_transcript.language_code, "af")
+ self.assertIn("&tlang=af", translated_transcript._url)
def test_translate_transcript__translation_language_not_available(self):
- transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en'])
+ transcript = YouTubeTranscriptApi.list_transcripts(
+ "GJLlxj_dtq8"
+ ).find_transcript(["en"])
with self.assertRaises(TranslationLanguageNotAvailable):
- transcript.translate('xyz')
+ transcript.translate("xyz")
def test_translate_transcript__not_translatable(self):
- transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en'])
+ transcript = YouTubeTranscriptApi.list_transcripts(
+ "GJLlxj_dtq8"
+ ).find_transcript(["en"])
transcript.translation_languages = []
with self.assertRaises(NotTranslatable):
- transcript.translate('af')
+ transcript.translate("af")
def test_get_transcript__correct_language_is_used(self):
- YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', ['de', 'en'])
+ YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8", ["de", "en"])
query_string = httpretty.last_request().querystring
- self.assertIn('lang', query_string)
- self.assertEqual(len(query_string['lang']), 1)
- self.assertEqual(query_string['lang'][0], 'de')
+ self.assertIn("lang", query_string)
+ self.assertEqual(len(query_string["lang"]), 1)
+ self.assertEqual(query_string["lang"][0], "de")
def test_get_transcript__fallback_language_is_used(self):
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube_ww1_nl_en.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube_ww1_nl_en.html.static"),
)
- YouTubeTranscriptApi.get_transcript('F1xioXWb8CY', ['de', 'en'])
+ YouTubeTranscriptApi.get_transcript("F1xioXWb8CY", ["de", "en"])
query_string = httpretty.last_request().querystring
- self.assertIn('lang', query_string)
- self.assertEqual(len(query_string['lang']), 1)
- self.assertEqual(query_string['lang'][0], 'en')
+ self.assertIn("lang", query_string)
+ self.assertEqual(len(query_string["lang"]), 1)
+ self.assertEqual(query_string["lang"][0], "en")
def test_get_transcript__create_consent_cookie_if_needed(self):
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube_consent_page.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube_consent_page.html.static"),
)
- YouTubeTranscriptApi.get_transcript('F1xioXWb8CY')
+ YouTubeTranscriptApi.get_transcript("F1xioXWb8CY")
self.assertEqual(len(httpretty.latest_requests()), 3)
for request in httpretty.latest_requests()[1:]:
- self.assertEqual(request.headers['cookie'], 'CONSENT=YES+cb.20210328-17-p0.de+FX+119')
+ self.assertEqual(
+ request.headers["cookie"], "CONSENT=YES+cb.20210328-17-p0.de+FX+119"
+ )
def test_get_transcript__exception_if_create_consent_cookie_failed(self):
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube_consent_page.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube_consent_page.html.static"),
)
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube_consent_page.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube_consent_page.html.static"),
)
with self.assertRaises(FailedToCreateConsentCookie):
- YouTubeTranscriptApi.get_transcript('F1xioXWb8CY')
+ YouTubeTranscriptApi.get_transcript("F1xioXWb8CY")
def test_get_transcript__exception_if_consent_cookie_age_invalid(self):
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube_consent_page_invalid.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube_consent_page_invalid.html.static"),
)
with self.assertRaises(FailedToCreateConsentCookie):
- YouTubeTranscriptApi.get_transcript('F1xioXWb8CY')
+ YouTubeTranscriptApi.get_transcript("F1xioXWb8CY")
def test_get_transcript__exception_if_video_unavailable(self):
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube_video_unavailable.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube_video_unavailable.html.static"),
)
with self.assertRaises(VideoUnavailable):
- YouTubeTranscriptApi.get_transcript('abc')
+ YouTubeTranscriptApi.get_transcript("abc")
def test_get_transcript__exception_if_youtube_request_fails(self):
httpretty.register_uri(
- httpretty.GET,
- 'https://www.youtube.com/watch',
- status=500
+ httpretty.GET, "https://www.youtube.com/watch", status=500
)
with self.assertRaises(YouTubeRequestFailed):
- YouTubeTranscriptApi.get_transcript('abc')
+ YouTubeTranscriptApi.get_transcript("abc")
def test_get_transcript__exception_if_youtube_request_limit_reached(self):
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube_too_many_requests.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube_too_many_requests.html.static"),
)
with self.assertRaises(TooManyRequests):
- YouTubeTranscriptApi.get_transcript('abc')
+ YouTubeTranscriptApi.get_transcript("abc")
def test_get_transcript__exception_if_transcripts_disabled(self):
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube_transcripts_disabled.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube_transcripts_disabled.html.static"),
)
with self.assertRaises(TranscriptsDisabled):
- YouTubeTranscriptApi.get_transcript('dsMFmonKDD4')
+ YouTubeTranscriptApi.get_transcript("dsMFmonKDD4")
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube_transcripts_disabled2.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube_transcripts_disabled2.html.static"),
)
with self.assertRaises(TranscriptsDisabled):
- YouTubeTranscriptApi.get_transcript('Fjg5lYqvzUs')
+ YouTubeTranscriptApi.get_transcript("Fjg5lYqvzUs")
def test_get_transcript__exception_if_language_unavailable(self):
with self.assertRaises(NoTranscriptFound):
- YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', languages=['cz'])
+ YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8", languages=["cz"])
def test_get_transcript__exception_if_no_transcript_available(self):
httpretty.register_uri(
httpretty.GET,
- 'https://www.youtube.com/watch',
- body=load_asset('youtube_no_transcript_available.html.static')
+ "https://www.youtube.com/watch",
+ body=load_asset("youtube_no_transcript_available.html.static"),
)
with self.assertRaises(NoTranscriptAvailable):
- YouTubeTranscriptApi.get_transcript('MwBPvcYFY2E')
+ YouTubeTranscriptApi.get_transcript("MwBPvcYFY2E")
def test_get_transcript__with_proxy(self):
- proxies = {'http': '', 'https:': ''}
- transcript = YouTubeTranscriptApi.get_transcript(
- 'GJLlxj_dtq8', proxies=proxies
- )
+ proxies = {"http": "", "https:": ""}
+ transcript = YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8", proxies=proxies)
self.assertEqual(
transcript,
[
- {'text': 'Hey, this is just a test', 'start': 0.0, 'duration': 1.54},
- {'text': 'this is not the original transcript', 'start': 1.54, 'duration': 4.16},
- {'text': 'just something shorter, I made up for testing', 'start': 5.7, 'duration': 3.239}
- ]
+ {"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54},
+ {
+ "text": "this is not the original transcript",
+ "start": 1.54,
+ "duration": 4.16,
+ },
+ {
+ "text": "just something shorter, I made up for testing",
+ "start": 5.7,
+ "duration": 3.239,
+ },
+ ],
)
-
+
def test_get_transcript__with_cookies(self):
dirname, filename = os.path.split(os.path.abspath(__file__))
- cookies = dirname + '/example_cookies.txt'
- transcript = YouTubeTranscriptApi.get_transcript('GJLlxj_dtq8', cookies=cookies)
+ cookies = dirname + "/example_cookies.txt"
+ transcript = YouTubeTranscriptApi.get_transcript("GJLlxj_dtq8", cookies=cookies)
self.assertEqual(
transcript,
[
- {'text': 'Hey, this is just a test', 'start': 0.0, 'duration': 1.54},
- {'text': 'this is not the original transcript', 'start': 1.54, 'duration': 4.16},
- {'text': 'just something shorter, I made up for testing', 'start': 5.7, 'duration': 3.239}
- ]
+ {"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54},
+ {
+ "text": "this is not the original transcript",
+ "start": 1.54,
+ "duration": 4.16,
+ },
+ {
+ "text": "just something shorter, I made up for testing",
+ "start": 5.7,
+ "duration": 3.239,
+ },
+ ],
)
def test_get_transcript__assertionerror_if_input_not_string(self):
with self.assertRaises(AssertionError):
- YouTubeTranscriptApi.get_transcript(['video_id_1', 'video_id_2'])
+ YouTubeTranscriptApi.get_transcript(["video_id_1", "video_id_2"])
def test_get_transcripts__assertionerror_if_input_not_list(self):
with self.assertRaises(AssertionError):
- YouTubeTranscriptApi.get_transcripts('video_id_1')
+ YouTubeTranscriptApi.get_transcripts("video_id_1")
- @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
+ @patch("youtube_transcript_api.YouTubeTranscriptApi.get_transcript")
def test_get_transcripts(self, mock_get_transcript):
- video_id_1 = 'video_id_1'
- video_id_2 = 'video_id_2'
- languages = ['de', 'en']
+ video_id_1 = "video_id_1"
+ video_id_2 = "video_id_2"
+ languages = ["de", "en"]
- YouTubeTranscriptApi.get_transcripts([video_id_1, video_id_2], languages=languages)
+ YouTubeTranscriptApi.get_transcripts(
+ [video_id_1, video_id_2], languages=languages
+ )
mock_get_transcript.assert_any_call(video_id_1, languages, None, None, False)
mock_get_transcript.assert_any_call(video_id_2, languages, None, None, False)
self.assertEqual(mock_get_transcript.call_count, 2)
- @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error'))
+ @patch(
+ "youtube_transcript_api.YouTubeTranscriptApi.get_transcript",
+ side_effect=Exception("Error"),
+ )
def test_get_transcripts__stop_on_error(self, mock_get_transcript):
with self.assertRaises(Exception):
- YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'])
+ YouTubeTranscriptApi.get_transcripts(["video_id_1", "video_id_2"])
- @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript', side_effect=Exception('Error'))
+ @patch(
+ "youtube_transcript_api.YouTubeTranscriptApi.get_transcript",
+ side_effect=Exception("Error"),
+ )
def test_get_transcripts__continue_on_error(self, mock_get_transcript):
- video_id_1 = 'video_id_1'
- video_id_2 = 'video_id_2'
+ video_id_1 = "video_id_1"
+ video_id_2 = "video_id_2"
- YouTubeTranscriptApi.get_transcripts(['video_id_1', 'video_id_2'], continue_after_error=True)
+ YouTubeTranscriptApi.get_transcripts(
+ ["video_id_1", "video_id_2"], continue_after_error=True
+ )
- mock_get_transcript.assert_any_call(video_id_1, ('en',), None, None, False)
- mock_get_transcript.assert_any_call(video_id_2, ('en',), None, None, False)
-
- @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
+ mock_get_transcript.assert_any_call(video_id_1, ("en",), None, None, False)
+ mock_get_transcript.assert_any_call(video_id_2, ("en",), None, None, False)
+
+ @patch("youtube_transcript_api.YouTubeTranscriptApi.get_transcript")
def test_get_transcripts__with_cookies(self, mock_get_transcript):
- cookies = '/example_cookies.txt'
- YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], cookies=cookies)
- mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), None, cookies, False)
+ cookies = "/example_cookies.txt"
+ YouTubeTranscriptApi.get_transcripts(["GJLlxj_dtq8"], cookies=cookies)
+ mock_get_transcript.assert_any_call(
+ "GJLlxj_dtq8", ("en",), None, cookies, False
+ )
- @patch('youtube_transcript_api.YouTubeTranscriptApi.get_transcript')
+ @patch("youtube_transcript_api.YouTubeTranscriptApi.get_transcript")
def test_get_transcripts__with_proxies(self, mock_get_transcript):
- proxies = {'http': '', 'https:': ''}
- YouTubeTranscriptApi.get_transcripts(['GJLlxj_dtq8'], proxies=proxies)
- mock_get_transcript.assert_any_call('GJLlxj_dtq8', ('en',), proxies, None, False)
+ proxies = {"http": "", "https:": ""}
+ YouTubeTranscriptApi.get_transcripts(["GJLlxj_dtq8"], proxies=proxies)
+ mock_get_transcript.assert_any_call(
+ "GJLlxj_dtq8", ("en",), proxies, None, False
+ )
def test_load_cookies(self):
dirname, filename = os.path.split(os.path.abspath(__file__))
- cookies = dirname + '/example_cookies.txt'
- session_cookies = YouTubeTranscriptApi._load_cookies(cookies, 'GJLlxj_dtq8')
- self.assertEqual({'TEST_FIELD': 'TEST_VALUE'}, requests.utils.dict_from_cookiejar(session_cookies))
+ cookies = dirname + "/example_cookies.txt"
+ session_cookies = YouTubeTranscriptApi._load_cookies(cookies, "GJLlxj_dtq8")
+ self.assertEqual(
+ {"TEST_FIELD": "TEST_VALUE"},
+ requests.utils.dict_from_cookiejar(session_cookies),
+ )
def test_load_cookies__bad_file_path(self):
- bad_cookies = 'nonexistent_cookies.txt'
+ bad_cookies = "nonexistent_cookies.txt"
with self.assertRaises(CookiePathInvalid):
- YouTubeTranscriptApi._load_cookies(bad_cookies, 'GJLlxj_dtq8')
+ YouTubeTranscriptApi._load_cookies(bad_cookies, "GJLlxj_dtq8")
def test_load_cookies__no_valid_cookies(self):
dirname, filename = os.path.split(os.path.abspath(__file__))
- expired_cookies = dirname + '/expired_example_cookies.txt'
+ expired_cookies = dirname + "/expired_example_cookies.txt"
with self.assertRaises(CookiesInvalid):
- YouTubeTranscriptApi._load_cookies(expired_cookies, 'GJLlxj_dtq8')
+ YouTubeTranscriptApi._load_cookies(expired_cookies, "GJLlxj_dtq8")
diff --git a/youtube_transcript_api/test/test_cli.py b/youtube_transcript_api/test/test_cli.py
index 26ffabc..623d4a4 100644
--- a/youtube_transcript_api/test/test_cli.py
+++ b/youtube_transcript_api/test/test_cli.py
@@ -10,211 +10,269 @@ from youtube_transcript_api._cli import YouTubeTranscriptCli
class TestYouTubeTranscriptCli(TestCase):
def setUp(self):
self.transcript_mock = MagicMock()
- self.transcript_mock.fetch = MagicMock(return_value=[
- {'text': 'Hey, this is just a test', 'start': 0.0, 'duration': 1.54},
- {'text': 'this is not the original transcript', 'start': 1.54, 'duration': 4.16},
- {'text': 'just something shorter, I made up for testing', 'start': 5.7, 'duration': 3.239}
- ])
+ self.transcript_mock.fetch = MagicMock(
+ return_value=[
+ {"text": "Hey, this is just a test", "start": 0.0, "duration": 1.54},
+ {
+ "text": "this is not the original transcript",
+ "start": 1.54,
+ "duration": 4.16,
+ },
+ {
+ "text": "just something shorter, I made up for testing",
+ "start": 5.7,
+ "duration": 3.239,
+ },
+ ]
+ )
self.transcript_mock.translate = MagicMock(return_value=self.transcript_mock)
self.transcript_list_mock = MagicMock()
- self.transcript_list_mock.find_generated_transcript = MagicMock(return_value=self.transcript_mock)
- self.transcript_list_mock.find_manually_created_transcript = MagicMock(return_value=self.transcript_mock)
- self.transcript_list_mock.find_transcript = MagicMock(return_value=self.transcript_mock)
+ self.transcript_list_mock.find_generated_transcript = MagicMock(
+ return_value=self.transcript_mock
+ )
+ self.transcript_list_mock.find_manually_created_transcript = MagicMock(
+ return_value=self.transcript_mock
+ )
+ self.transcript_list_mock.find_transcript = MagicMock(
+ return_value=self.transcript_mock
+ )
- YouTubeTranscriptApi.list_transcripts = MagicMock(return_value=self.transcript_list_mock)
+ YouTubeTranscriptApi.list_transcripts = MagicMock(
+ return_value=self.transcript_list_mock
+ )
def test_argument_parsing(self):
- parsed_args = YouTubeTranscriptCli('v1 v2 --format json --languages de en'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
- self.assertEqual(parsed_args.format, 'json')
- self.assertEqual(parsed_args.languages, ['de', 'en'])
- self.assertEqual(parsed_args.http_proxy, '')
- self.assertEqual(parsed_args.https_proxy, '')
-
- parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --format json'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
- self.assertEqual(parsed_args.format, 'json')
- self.assertEqual(parsed_args.languages, ['de', 'en'])
- self.assertEqual(parsed_args.http_proxy, '')
- self.assertEqual(parsed_args.https_proxy, '')
-
- parsed_args = YouTubeTranscriptCli(' --format json v1 v2 --languages de en'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
- self.assertEqual(parsed_args.format, 'json')
- self.assertEqual(parsed_args.languages, ['de', 'en'])
- self.assertEqual(parsed_args.http_proxy, '')
- self.assertEqual(parsed_args.https_proxy, '')
+ parsed_args = YouTubeTranscriptCli(
+ "v1 v2 --format json --languages de en".split()
+ )._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
+ self.assertEqual(parsed_args.format, "json")
+ self.assertEqual(parsed_args.languages, ["de", "en"])
+ self.assertEqual(parsed_args.http_proxy, "")
+ self.assertEqual(parsed_args.https_proxy, "")
parsed_args = YouTubeTranscriptCli(
- 'v1 v2 --languages de en --format json '
- '--http-proxy http://user:pass@domain:port '
- '--https-proxy https://user:pass@domain:port'.split()
+ "v1 v2 --languages de en --format json".split()
)._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
- self.assertEqual(parsed_args.format, 'json')
- self.assertEqual(parsed_args.languages, ['de', 'en'])
- self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
- self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
+ self.assertEqual(parsed_args.format, "json")
+ self.assertEqual(parsed_args.languages, ["de", "en"])
+ self.assertEqual(parsed_args.http_proxy, "")
+ self.assertEqual(parsed_args.https_proxy, "")
parsed_args = YouTubeTranscriptCli(
- 'v1 v2 --languages de en --format json --http-proxy http://user:pass@domain:port'.split()
+ " --format json v1 v2 --languages de en".split()
)._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
- self.assertEqual(parsed_args.format, 'json')
- self.assertEqual(parsed_args.languages, ['de', 'en'])
- self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
- self.assertEqual(parsed_args.https_proxy, '')
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
+ self.assertEqual(parsed_args.format, "json")
+ self.assertEqual(parsed_args.languages, ["de", "en"])
+ self.assertEqual(parsed_args.http_proxy, "")
+ self.assertEqual(parsed_args.https_proxy, "")
parsed_args = YouTubeTranscriptCli(
- 'v1 v2 --languages de en --format json --https-proxy https://user:pass@domain:port'.split()
+ "v1 v2 --languages de en --format json "
+ "--http-proxy http://user:pass@domain:port "
+ "--https-proxy https://user:pass@domain:port".split()
)._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
- self.assertEqual(parsed_args.format, 'json')
- self.assertEqual(parsed_args.languages, ['de', 'en'])
- self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
- self.assertEqual(parsed_args.http_proxy, '')
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
+ self.assertEqual(parsed_args.format, "json")
+ self.assertEqual(parsed_args.languages, ["de", "en"])
+ self.assertEqual(parsed_args.http_proxy, "http://user:pass@domain:port")
+ self.assertEqual(parsed_args.https_proxy, "https://user:pass@domain:port")
+
+ parsed_args = YouTubeTranscriptCli(
+ "v1 v2 --languages de en --format json --http-proxy http://user:pass@domain:port".split()
+ )._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
+ self.assertEqual(parsed_args.format, "json")
+ self.assertEqual(parsed_args.languages, ["de", "en"])
+ self.assertEqual(parsed_args.http_proxy, "http://user:pass@domain:port")
+ self.assertEqual(parsed_args.https_proxy, "")
+
+ parsed_args = YouTubeTranscriptCli(
+ "v1 v2 --languages de en --format json --https-proxy https://user:pass@domain:port".split()
+ )._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
+ self.assertEqual(parsed_args.format, "json")
+ self.assertEqual(parsed_args.languages, ["de", "en"])
+ self.assertEqual(parsed_args.https_proxy, "https://user:pass@domain:port")
+ self.assertEqual(parsed_args.http_proxy, "")
def test_argument_parsing__only_video_ids(self):
- parsed_args = YouTubeTranscriptCli('v1 v2'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
- self.assertEqual(parsed_args.format, 'pretty')
- self.assertEqual(parsed_args.languages, ['en'])
+ parsed_args = YouTubeTranscriptCli("v1 v2".split())._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
+ self.assertEqual(parsed_args.format, "pretty")
+ self.assertEqual(parsed_args.languages, ["en"])
def test_argument_parsing__video_ids_starting_with_dash(self):
- parsed_args = YouTubeTranscriptCli('\-v1 \-\-v2 \--v3'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['-v1', '--v2', '--v3'])
- self.assertEqual(parsed_args.format, 'pretty')
- self.assertEqual(parsed_args.languages, ['en'])
+ parsed_args = YouTubeTranscriptCli("\-v1 \-\-v2 \--v3".split())._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["-v1", "--v2", "--v3"])
+ self.assertEqual(parsed_args.format, "pretty")
+ self.assertEqual(parsed_args.languages, ["en"])
def test_argument_parsing__fail_without_video_ids(self):
with self.assertRaises(SystemExit):
- YouTubeTranscriptCli('--format json'.split())._parse_args()
+ YouTubeTranscriptCli("--format json".split())._parse_args()
def test_argument_parsing__json(self):
- parsed_args = YouTubeTranscriptCli('v1 v2 --format json'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
- self.assertEqual(parsed_args.format, 'json')
- self.assertEqual(parsed_args.languages, ['en'])
+ parsed_args = YouTubeTranscriptCli("v1 v2 --format json".split())._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
+ self.assertEqual(parsed_args.format, "json")
+ self.assertEqual(parsed_args.languages, ["en"])
- parsed_args = YouTubeTranscriptCli('--format json v1 v2'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
- self.assertEqual(parsed_args.format, 'json')
- self.assertEqual(parsed_args.languages, ['en'])
+ parsed_args = YouTubeTranscriptCli("--format json v1 v2".split())._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
+ self.assertEqual(parsed_args.format, "json")
+ self.assertEqual(parsed_args.languages, ["en"])
def test_argument_parsing__languages(self):
- parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
- self.assertEqual(parsed_args.format, 'pretty')
- self.assertEqual(parsed_args.languages, ['de', 'en'])
+ parsed_args = YouTubeTranscriptCli(
+ "v1 v2 --languages de en".split()
+ )._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
+ self.assertEqual(parsed_args.format, "pretty")
+ self.assertEqual(parsed_args.languages, ["de", "en"])
def test_argument_parsing__proxies(self):
parsed_args = YouTubeTranscriptCli(
- 'v1 v2 --http-proxy http://user:pass@domain:port'.split()
+ "v1 v2 --http-proxy http://user:pass@domain:port".split()
)._parse_args()
- self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
+ self.assertEqual(parsed_args.http_proxy, "http://user:pass@domain:port")
parsed_args = YouTubeTranscriptCli(
- 'v1 v2 --https-proxy https://user:pass@domain:port'.split()
+ "v1 v2 --https-proxy https://user:pass@domain:port".split()
)._parse_args()
- self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
+ self.assertEqual(parsed_args.https_proxy, "https://user:pass@domain:port")
parsed_args = YouTubeTranscriptCli(
- 'v1 v2 --http-proxy http://user:pass@domain:port --https-proxy https://user:pass@domain:port'.split()
+ "v1 v2 --http-proxy http://user:pass@domain:port --https-proxy https://user:pass@domain:port".split()
)._parse_args()
- self.assertEqual(parsed_args.http_proxy, 'http://user:pass@domain:port')
- self.assertEqual(parsed_args.https_proxy, 'https://user:pass@domain:port')
+ self.assertEqual(parsed_args.http_proxy, "http://user:pass@domain:port")
+ self.assertEqual(parsed_args.https_proxy, "https://user:pass@domain:port")
- parsed_args = YouTubeTranscriptCli(
- 'v1 v2'.split()
- )._parse_args()
- self.assertEqual(parsed_args.http_proxy, '')
- self.assertEqual(parsed_args.https_proxy, '')
+ parsed_args = YouTubeTranscriptCli("v1 v2".split())._parse_args()
+ self.assertEqual(parsed_args.http_proxy, "")
+ self.assertEqual(parsed_args.https_proxy, "")
def test_argument_parsing__list_transcripts(self):
- parsed_args = YouTubeTranscriptCli('--list-transcripts v1 v2'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
+ parsed_args = YouTubeTranscriptCli(
+ "--list-transcripts v1 v2".split()
+ )._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
self.assertTrue(parsed_args.list_transcripts)
- parsed_args = YouTubeTranscriptCli('v1 v2 --list-transcripts'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
+ parsed_args = YouTubeTranscriptCli(
+ "v1 v2 --list-transcripts".split()
+ )._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
self.assertTrue(parsed_args.list_transcripts)
def test_argument_parsing__translate(self):
- parsed_args = YouTubeTranscriptCli('v1 v2 --languages de en --translate cz'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
- self.assertEqual(parsed_args.format, 'pretty')
- self.assertEqual(parsed_args.languages, ['de', 'en'])
- self.assertEqual(parsed_args.translate, 'cz')
+ parsed_args = YouTubeTranscriptCli(
+ "v1 v2 --languages de en --translate cz".split()
+ )._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
+ self.assertEqual(parsed_args.format, "pretty")
+ self.assertEqual(parsed_args.languages, ["de", "en"])
+ self.assertEqual(parsed_args.translate, "cz")
- parsed_args = YouTubeTranscriptCli('v1 v2 --translate cz --languages de en'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
- self.assertEqual(parsed_args.format, 'pretty')
- self.assertEqual(parsed_args.languages, ['de', 'en'])
- self.assertEqual(parsed_args.translate, 'cz')
+ parsed_args = YouTubeTranscriptCli(
+ "v1 v2 --translate cz --languages de en".split()
+ )._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
+ self.assertEqual(parsed_args.format, "pretty")
+ self.assertEqual(parsed_args.languages, ["de", "en"])
+ self.assertEqual(parsed_args.translate, "cz")
def test_argument_parsing__manually_or_generated(self):
- parsed_args = YouTubeTranscriptCli('v1 v2 --exclude-manually-created'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
+ parsed_args = YouTubeTranscriptCli(
+ "v1 v2 --exclude-manually-created".split()
+ )._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
self.assertTrue(parsed_args.exclude_manually_created)
self.assertFalse(parsed_args.exclude_generated)
- parsed_args = YouTubeTranscriptCli('v1 v2 --exclude-generated'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
+ parsed_args = YouTubeTranscriptCli(
+ "v1 v2 --exclude-generated".split()
+ )._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
self.assertFalse(parsed_args.exclude_manually_created)
self.assertTrue(parsed_args.exclude_generated)
- parsed_args = YouTubeTranscriptCli('v1 v2 --exclude-manually-created --exclude-generated'.split())._parse_args()
- self.assertEqual(parsed_args.video_ids, ['v1', 'v2'])
+ parsed_args = YouTubeTranscriptCli(
+ "v1 v2 --exclude-manually-created --exclude-generated".split()
+ )._parse_args()
+ self.assertEqual(parsed_args.video_ids, ["v1", "v2"])
self.assertTrue(parsed_args.exclude_manually_created)
self.assertTrue(parsed_args.exclude_generated)
def test_run(self):
- YouTubeTranscriptCli('v1 v2 --languages de en'.split()).run()
+ YouTubeTranscriptCli("v1 v2 --languages de en".split()).run()
- YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None)
- YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
+ YouTubeTranscriptApi.list_transcripts.assert_any_call(
+ "v1", proxies=None, cookies=None
+ )
+ YouTubeTranscriptApi.list_transcripts.assert_any_call(
+ "v2", proxies=None, cookies=None
+ )
- self.transcript_list_mock.find_transcript.assert_any_call(['de', 'en'])
+ self.transcript_list_mock.find_transcript.assert_any_call(["de", "en"])
def test_run__failing_transcripts(self):
- YouTubeTranscriptApi.list_transcripts = MagicMock(side_effect=VideoUnavailable('video_id'))
+ YouTubeTranscriptApi.list_transcripts = MagicMock(
+ side_effect=VideoUnavailable("video_id")
+ )
- output = YouTubeTranscriptCli('v1 --languages de en'.split()).run()
+ output = YouTubeTranscriptCli("v1 --languages de en".split()).run()
- self.assertEqual(output, str(VideoUnavailable('video_id')))
+ self.assertEqual(output, str(VideoUnavailable("video_id")))
def test_run__exclude_generated(self):
- YouTubeTranscriptCli('v1 v2 --languages de en --exclude-generated'.split()).run()
+ YouTubeTranscriptCli(
+ "v1 v2 --languages de en --exclude-generated".split()
+ ).run()
- self.transcript_list_mock.find_manually_created_transcript.assert_any_call(['de', 'en'])
+ self.transcript_list_mock.find_manually_created_transcript.assert_any_call(
+ ["de", "en"]
+ )
def test_run__exclude_manually_created(self):
- YouTubeTranscriptCli('v1 v2 --languages de en --exclude-manually-created'.split()).run()
+ YouTubeTranscriptCli(
+ "v1 v2 --languages de en --exclude-manually-created".split()
+ ).run()
- self.transcript_list_mock.find_generated_transcript.assert_any_call(['de', 'en'])
+ self.transcript_list_mock.find_generated_transcript.assert_any_call(
+ ["de", "en"]
+ )
def test_run__exclude_manually_created_and_generated(self):
self.assertEqual(
YouTubeTranscriptCli(
- 'v1 v2 --languages de en --exclude-manually-created --exclude-generated'.split()
+ "v1 v2 --languages de en --exclude-manually-created --exclude-generated".split()
).run(),
- ''
+ "",
)
def test_run__translate(self):
- YouTubeTranscriptCli('v1 v2 --languages de en --translate cz'.split()).run(),
+ YouTubeTranscriptCli("v1 v2 --languages de en --translate cz".split()).run(),
- self.transcript_mock.translate.assert_any_call('cz')
+ self.transcript_mock.translate.assert_any_call("cz")
def test_run__list_transcripts(self):
- YouTubeTranscriptCli('--list-transcripts v1 v2'.split()).run()
+ YouTubeTranscriptCli("--list-transcripts v1 v2".split()).run()
- YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies=None)
- YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies=None)
+ YouTubeTranscriptApi.list_transcripts.assert_any_call(
+ "v1", proxies=None, cookies=None
+ )
+ YouTubeTranscriptApi.list_transcripts.assert_any_call(
+ "v2", proxies=None, cookies=None
+ )
def test_run__json_output(self):
- output = YouTubeTranscriptCli('v1 v2 --languages de en --format json'.split()).run()
+ output = YouTubeTranscriptCli(
+ "v1 v2 --languages de en --format json".split()
+ ).run()
# will fail if output is not valid json
json.loads(output)
@@ -222,31 +280,37 @@ class TestYouTubeTranscriptCli(TestCase):
def test_run__proxies(self):
YouTubeTranscriptCli(
(
- 'v1 v2 --languages de en '
- '--http-proxy http://user:pass@domain:port '
- '--https-proxy https://user:pass@domain:port'
+ "v1 v2 --languages de en "
+ "--http-proxy http://user:pass@domain:port "
+ "--https-proxy https://user:pass@domain:port"
).split()
).run()
YouTubeTranscriptApi.list_transcripts.assert_any_call(
- 'v1',
- proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'},
- cookies= None
+ "v1",
+ proxies={
+ "http": "http://user:pass@domain:port",
+ "https": "https://user:pass@domain:port",
+ },
+ cookies=None,
)
YouTubeTranscriptApi.list_transcripts.assert_any_call(
- 'v2',
- proxies={'http': 'http://user:pass@domain:port', 'https': 'https://user:pass@domain:port'},
- cookies=None
+ "v2",
+ proxies={
+ "http": "http://user:pass@domain:port",
+ "https": "https://user:pass@domain:port",
+ },
+ cookies=None,
)
def test_run__cookies(self):
YouTubeTranscriptCli(
- (
- 'v1 v2 --languages de en '
- '--cookies blahblah.txt'
- ).split()
+ ("v1 v2 --languages de en " "--cookies blahblah.txt").split()
).run()
- YouTubeTranscriptApi.list_transcripts.assert_any_call('v1', proxies=None, cookies='blahblah.txt')
- YouTubeTranscriptApi.list_transcripts.assert_any_call('v2', proxies=None, cookies='blahblah.txt')
-
+ YouTubeTranscriptApi.list_transcripts.assert_any_call(
+ "v1", proxies=None, cookies="blahblah.txt"
+ )
+ YouTubeTranscriptApi.list_transcripts.assert_any_call(
+ "v2", proxies=None, cookies="blahblah.txt"
+ )
diff --git a/youtube_transcript_api/test/test_formatters.py b/youtube_transcript_api/test/test_formatters.py
index b0b3ba2..7eda79a 100644
--- a/youtube_transcript_api/test/test_formatters.py
+++ b/youtube_transcript_api/test/test_formatters.py
@@ -10,16 +10,17 @@ from youtube_transcript_api.formatters import (
TextFormatter,
SRTFormatter,
WebVTTFormatter,
- PrettyPrintFormatter, FormatterLoader
+ PrettyPrintFormatter,
+ FormatterLoader,
)
class TestFormatters(TestCase):
def setUp(self):
self.transcript = [
- {'text': 'Test line 1', 'start': 0.0, 'duration': 1.50},
- {'text': 'line between', 'start': 1.5, 'duration': 2.0},
- {'text': 'testing the end line', 'start': 2.5, 'duration': 3.25}
+ {"text": "Test line 1", "start": 0.0, "duration": 1.50},
+ {"text": "line between", "start": 1.5, "duration": 2.0},
+ {"text": "testing the end line", "start": 2.5, "duration": 3.25},
]
self.transcripts = [self.transcript, self.transcript]
@@ -31,27 +32,27 @@ class TestFormatters(TestCase):
def test_srt_formatter_starting(self):
content = SRTFormatter().format_transcript(self.transcript)
- lines = content.split('\n')
+ lines = content.split("\n")
# test starting lines
self.assertEqual(lines[0], "1")
self.assertEqual(lines[1], "00:00:00,000 --> 00:00:01,500")
-
+
def test_srt_formatter_middle(self):
content = SRTFormatter().format_transcript(self.transcript)
- lines = content.split('\n')
+ lines = content.split("\n")
# test middle lines
self.assertEqual(lines[4], "2")
self.assertEqual(lines[5], "00:00:01,500 --> 00:00:02,500")
- self.assertEqual(lines[6], self.transcript[1]['text'])
+ self.assertEqual(lines[6], self.transcript[1]["text"])
def test_srt_formatter_ending(self):
content = SRTFormatter().format_transcript(self.transcript)
- lines = content.split('\n')
+ lines = content.split("\n")
# test ending lines
- self.assertEqual(lines[-2], self.transcript[-1]['text'])
+ self.assertEqual(lines[-2], self.transcript[-1]["text"])
self.assertEqual(lines[-1], "")
def test_srt_formatter_many(self):
@@ -59,22 +60,25 @@ class TestFormatters(TestCase):
content = formatter.format_transcripts(self.transcripts)
formatted_single_transcript = formatter.format_transcript(self.transcript)
- self.assertEqual(content, formatted_single_transcript + '\n\n\n' + formatted_single_transcript)
+ self.assertEqual(
+ content,
+ formatted_single_transcript + "\n\n\n" + formatted_single_transcript,
+ )
def test_webvtt_formatter_starting(self):
content = WebVTTFormatter().format_transcript(self.transcript)
- lines = content.split('\n')
+ lines = content.split("\n")
# test starting lines
self.assertEqual(lines[0], "WEBVTT")
self.assertEqual(lines[1], "")
-
+
def test_webvtt_formatter_ending(self):
content = WebVTTFormatter().format_transcript(self.transcript)
- lines = content.split('\n')
+ lines = content.split("\n")
# test ending lines
- self.assertEqual(lines[-2], self.transcript[-1]['text'])
+ self.assertEqual(lines[-2], self.transcript[-1]["text"])
self.assertEqual(lines[-1], "")
def test_webvtt_formatter_many(self):
@@ -82,7 +86,10 @@ class TestFormatters(TestCase):
content = formatter.format_transcripts(self.transcripts)
formatted_single_transcript = formatter.format_transcript(self.transcript)
- self.assertEqual(content, formatted_single_transcript + '\n\n\n' + formatted_single_transcript)
+ self.assertEqual(
+ content,
+ formatted_single_transcript + "\n\n\n" + formatted_single_transcript,
+ )
def test_pretty_print_formatter(self):
content = PrettyPrintFormatter().format_transcript(self.transcript)
@@ -106,7 +113,7 @@ class TestFormatters(TestCase):
def test_text_formatter(self):
content = TextFormatter().format_transcript(self.transcript)
- lines = content.split('\n')
+ lines = content.split("\n")
self.assertEqual(lines[0], self.transcript[0]["text"])
self.assertEqual(lines[-1], self.transcript[-1]["text"])
@@ -116,11 +123,14 @@ class TestFormatters(TestCase):
content = formatter.format_transcripts(self.transcripts)
formatted_single_transcript = formatter.format_transcript(self.transcript)
- self.assertEqual(content, formatted_single_transcript + '\n\n\n' + formatted_single_transcript)
+ self.assertEqual(
+ content,
+ formatted_single_transcript + "\n\n\n" + formatted_single_transcript,
+ )
def test_formatter_loader(self):
loader = FormatterLoader()
- formatter = loader.load('json')
+ formatter = loader.load("json")
self.assertTrue(isinstance(formatter, JSONFormatter))
@@ -132,4 +142,4 @@ class TestFormatters(TestCase):
def test_formatter_loader__unknown_format(self):
with self.assertRaises(FormatterLoader.UnknownFormatterType):
- FormatterLoader().load('png')
+ FormatterLoader().load("png")