Merge pull request #9 from jdepoix/feature/ISSUE-8

Feature/issue 8
This commit is contained in:
jdepoix 2019-03-11 14:55:54 +01:00 committed by GitHub
commit 4e228d9978
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 8 deletions

View File

@ -54,7 +54,7 @@ You can also add the `languages` param if you want to make sure the transcripts
YouTubeTranscriptApi.get_transcripts(video_ids, languages=['de', 'en']) YouTubeTranscriptApi.get_transcripts(video_ids, languages=['de', 'en'])
``` ```
It's a list of language codes in a descending priority. In this example it will first try to fetch the german transcript (`'de'`) and then fetch the english transcipt (`'en'`) if it fails to do so. As I can't provide a complete list of all working language codes with full certainty, you may have to play around with the language codes a bit, to find the one which is working for you! It's a list of language codes in a descending priority. In this example it will first try to fetch the german transcript (`'de'`) and then fetch the english transcript (`'en'`) if it fails to do so. As I can't provide a complete list of all working language codes with full certainty, you may have to play around with the language codes a bit, to find the one which is working for you!
To get transcripts for a list fo video ids you can call: To get transcripts for a list fo video ids you can call:
@ -72,10 +72,16 @@ Execute the CLI script using the video ids as parameters and the results will be
youtube_transcript_api <first_video_id> <second_video_id> ... youtube_transcript_api <first_video_id> <second_video_id> ...
``` ```
The CLI also gives you the option to provide a list of preferred languages:
```
youtube_transcript_api <first_video_id> <second_video_id> ... --languages de en
```
If you would prefer to write it into a file or pipe it into another application, you can also output the results as json using the following line: If you would prefer to write it into a file or pipe it into another application, you can also output the results as json using the following line:
``` ```
youtube_transcript_api --json <first_video_id> <second_video_id> ... > transcripts.json youtube_transcript_api <first_video_id> <second_video_id> ... --languages de en --json > transcripts.json
``` ```
## Warning ## Warning

View File

@ -11,7 +11,7 @@ def get_long_description():
setuptools.setup( setuptools.setup(
name="youtube_transcript_api", name="youtube_transcript_api",
version="0.1.2", version="0.1.3",
author="Jonas Depoix", author="Jonas Depoix",
author_email="jonas.depoix@web.de", author_email="jonas.depoix@web.de",
description="This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles and it does not require a headless browser, like other selenium based solutions do!", description="This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles and it does not require a headless browser, like other selenium based solutions do!",

View File

@ -6,18 +6,57 @@ from pprint import pprint
import logging import logging
import argparse
from ._api import YouTubeTranscriptApi from ._api import YouTubeTranscriptApi
def parse_args(args):
parser = argparse.ArgumentParser(
description=(
'This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. '
'It also works for automatically generated subtitles and it does not require a headless browser, like '
'other selenium based solutions do!'
)
)
parser.add_argument('video_ids', nargs='*', type=str, help='List of YouTube video IDs.')
parser.add_argument(
'--languages',
nargs='*',
default=[],
type=str,
help=(
'A list of language codes in a descending priority. For example, if this is set to "de en" it will first '
'try to fetch the german transcript (de) and then fetch the english transcipt (en) if it fails to do so. '
'As I can\'t provide a complete list of all working language codes with full certainty, you may have to '
'play around with the language codes a bit, to find the one which is working for you!'
),
)
parser.add_argument(
'--json',
action='store_const',
const=True,
default=False,
help='If this flag is set the output will be JSON formatted.',
)
return parser.parse_args(args)
def main(): def main():
logging.basicConfig() logging.basicConfig()
if len(sys.argv) <= 1: parsed_args = parse_args(sys.argv[1:])
print('No YouTube video id was found') transcripts, _ = YouTubeTranscriptApi.get_transcripts(
elif sys.argv[1] == '--json': parsed_args.video_ids,
print(json.dumps(YouTubeTranscriptApi.get_transcripts(sys.argv[2:], continue_after_error=True)[0])) languages=parsed_args.languages,
continue_after_error=True
)
if parsed_args.json:
print(json.dumps(transcripts))
else: else:
pprint(YouTubeTranscriptApi.get_transcripts(sys.argv[1:], continue_after_error=True)[0]) pprint(transcripts)
if __name__ == '__main__': if __name__ == '__main__':