From cdb7b90b821ad0b012dee385f22512e3b2c40696 Mon Sep 17 00:00:00 2001 From: Jonas Depoix Date: Mon, 11 Mar 2019 14:41:26 +0100 Subject: [PATCH] improved param parsing and CLI support for choosing languages --- youtube_transcript_api/__main__.py | 49 +++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/youtube_transcript_api/__main__.py b/youtube_transcript_api/__main__.py index 37bd7bb..205358a 100644 --- a/youtube_transcript_api/__main__.py +++ b/youtube_transcript_api/__main__.py @@ -6,18 +6,57 @@ from pprint import pprint import logging +import argparse + from ._api import YouTubeTranscriptApi +def parse_args(args): + parser = argparse.ArgumentParser( + description=( + 'This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. ' + 'It also works for automatically generated subtitles and it does not require a headless browser, like ' + 'other selenium based solutions do!' + ) + ) + parser.add_argument('video_ids', nargs='*', type=str, help='List of YouTube video IDs.') + parser.add_argument( + '--languages', + nargs='*', + default=[], + type=str, + help=( + 'A list of language codes in a descending priority. For example, if this is set to "de en" it will first ' + 'try to fetch the german transcript (de) and then fetch the english transcipt (en) if it fails to do so. ' + 'As I can\'t provide a complete list of all working language codes with full certainty, you may have to ' + 'play around with the language codes a bit, to find the one which is working for you!' + ), + ) + parser.add_argument( + '--json', + action='store_const', + const=True, + default=False, + help='If this flag is set the output will be JSON formatted.', + ) + + return parser.parse_args(args) + + def main(): logging.basicConfig() - if len(sys.argv) <= 1: - print('No YouTube video id was found') - elif sys.argv[1] == '--json': - print(json.dumps(YouTubeTranscriptApi.get_transcripts(sys.argv[2:], continue_after_error=True)[0])) + parsed_args = parse_args(sys.argv) + transcripts, _ = YouTubeTranscriptApi.get_transcripts( + parsed_args.video_ids, + languages=parsed_args.languages, + continue_after_error=True + ) + + if parsed_args.json: + print(json.dumps(transcripts)) else: - pprint(YouTubeTranscriptApi.get_transcripts(sys.argv[1:], continue_after_error=True)[0]) + pprint(transcripts) if __name__ == '__main__':