156 lines
5.3 KiB
Python
156 lines
5.3 KiB
Python
import argparse
|
|
|
|
from ._api import YouTubeTranscriptApi
|
|
|
|
from .formatters import FormatterLoader
|
|
|
|
|
|
class YouTubeTranscriptCli(object):
|
|
def __init__(self, args):
|
|
self._args = args
|
|
|
|
def run(self):
|
|
parsed_args = self._parse_args()
|
|
|
|
if parsed_args.exclude_manually_created and parsed_args.exclude_generated:
|
|
return ""
|
|
|
|
proxies = None
|
|
if parsed_args.http_proxy != "" or parsed_args.https_proxy != "":
|
|
proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy}
|
|
|
|
cookies = parsed_args.cookies
|
|
|
|
transcripts = []
|
|
exceptions = []
|
|
|
|
for video_id in parsed_args.video_ids:
|
|
try:
|
|
transcripts.append(
|
|
self._fetch_transcript(parsed_args, proxies, cookies, video_id)
|
|
)
|
|
except Exception as exception:
|
|
exceptions.append(exception)
|
|
|
|
return "\n\n".join(
|
|
[str(exception) for exception in exceptions]
|
|
+ (
|
|
[
|
|
FormatterLoader()
|
|
.load(parsed_args.format)
|
|
.format_transcripts(transcripts)
|
|
]
|
|
if transcripts
|
|
else []
|
|
)
|
|
)
|
|
|
|
def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
|
|
transcript_list = YouTubeTranscriptApi.list_transcripts(
|
|
video_id, proxies=proxies, cookies=cookies
|
|
)
|
|
|
|
if parsed_args.list_transcripts:
|
|
return str(transcript_list)
|
|
|
|
if parsed_args.exclude_manually_created:
|
|
transcript = transcript_list.find_generated_transcript(
|
|
parsed_args.languages
|
|
)
|
|
elif parsed_args.exclude_generated:
|
|
transcript = transcript_list.find_manually_created_transcript(
|
|
parsed_args.languages
|
|
)
|
|
else:
|
|
transcript = transcript_list.find_transcript(parsed_args.languages)
|
|
|
|
if parsed_args.translate:
|
|
transcript = transcript.translate(parsed_args.translate)
|
|
|
|
return transcript.fetch()
|
|
|
|
def _parse_args(self):
|
|
parser = argparse.ArgumentParser(
|
|
description=(
|
|
"This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. "
|
|
"It also works for automatically generated subtitles and it does not require a headless browser, like "
|
|
"other selenium based solutions do!"
|
|
)
|
|
)
|
|
parser.add_argument(
|
|
"--list-transcripts",
|
|
action="store_const",
|
|
const=True,
|
|
default=False,
|
|
help="This will list the languages in which the given videos are available in.",
|
|
)
|
|
parser.add_argument(
|
|
"video_ids", nargs="+", type=str, help="List of YouTube video IDs."
|
|
)
|
|
parser.add_argument(
|
|
"--languages",
|
|
nargs="*",
|
|
default=[
|
|
"en",
|
|
],
|
|
type=str,
|
|
help=(
|
|
'A list of language codes in a descending priority. For example, if this is set to "de en" it will '
|
|
"first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails "
|
|
"to do so. As I can't provide a complete list of all working language codes with full certainty, you "
|
|
"may have to play around with the language codes a bit, to find the one which is working for you!"
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--exclude-generated",
|
|
action="store_const",
|
|
const=True,
|
|
default=False,
|
|
help="If this flag is set transcripts which have been generated by YouTube will not be retrieved.",
|
|
)
|
|
parser.add_argument(
|
|
"--exclude-manually-created",
|
|
action="store_const",
|
|
const=True,
|
|
default=False,
|
|
help="If this flag is set transcripts which have been manually created will not be retrieved.",
|
|
)
|
|
parser.add_argument(
|
|
"--format",
|
|
type=str,
|
|
default="pretty",
|
|
choices=tuple(FormatterLoader.TYPES.keys()),
|
|
)
|
|
parser.add_argument(
|
|
"--translate",
|
|
default="",
|
|
help=(
|
|
"The language code for the language you want this transcript to be translated to. Use the "
|
|
"--list-transcripts feature to find out which languages are translatable and which translation "
|
|
"languages are available."
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--http-proxy",
|
|
default="",
|
|
metavar="URL",
|
|
help="Use the specified HTTP proxy.",
|
|
)
|
|
parser.add_argument(
|
|
"--https-proxy",
|
|
default="",
|
|
metavar="URL",
|
|
help="Use the specified HTTPS proxy.",
|
|
)
|
|
parser.add_argument(
|
|
"--cookies",
|
|
default=None,
|
|
help="The cookie file that will be used for authorization with youtube.",
|
|
)
|
|
|
|
return self._sanitize_video_ids(parser.parse_args(self._args))
|
|
|
|
def _sanitize_video_ids(self, args):
|
|
args.video_ids = [video_id.replace("\\", "") for video_id in args.video_ids]
|
|
return args
|