added translate feature; added iterator to TranscriptList

This commit is contained in:
Jonas Depoix 2019-12-16 16:58:26 +01:00
parent a1b1e001fe
commit 409141ab51
2 changed files with 38 additions and 25 deletions

View File

@ -43,6 +43,14 @@ class TranscriptsDisabled(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = 'Subtitles are disabled for this video' CAUSE_MESSAGE = 'Subtitles are disabled for this video'
class NotTranslatable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = 'The requested language is not translatable'
class TranslationLanguageNotAvailable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = 'The requested translation language is not available'
class NoTranscriptFound(CouldNotRetrieveTranscript): class NoTranscriptFound(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = ( CAUSE_MESSAGE = (
'No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n' 'No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n'

View File

@ -12,7 +12,9 @@ from xml.etree import ElementTree
import re import re
from ._html_unescaping import unescape from ._html_unescaping import unescape
from ._errors import VideoUnavailable, NoTranscriptFound, TranscriptsDisabled from ._errors import (
VideoUnavailable, NoTranscriptFound, TranscriptsDisabled, NotTranslatable, TranslationLanguageNotAvailable
)
from ._settings import WATCH_URL from ._settings import WATCH_URL
@ -53,9 +55,6 @@ class TranscriptList():
This object represents a list of transcripts. It can be iterated over to list all transcripts which are available This object represents a list of transcripts. It can be iterated over to list all transcripts which are available
for a given YouTube video. Also it provides functionality to search for a transcript in a given language. for a given YouTube video. Also it provides functionality to search for a transcript in a given language.
""" """
# TODO implement iterator
def __init__(self, video_id, manually_created_transcripts, generated_transcripts): def __init__(self, video_id, manually_created_transcripts, generated_transcripts):
""" """
The constructor is only for internal use. Use the static build method instead. The constructor is only for internal use. Use the static build method instead.
@ -117,6 +116,9 @@ class TranscriptList():
generated_transcripts, generated_transcripts,
) )
def __iter__(self):
return iter(list(self._manually_created_transcripts.values()) + list(self._generated_transcripts.values()))
def find_transcript(self, language_codes): def find_transcript(self, language_codes):
""" """
Finds a transcript for a given language code. Manually created transcripts are returned first and only if none Finds a transcript for a given language code. Manually created transcripts are returned first and only if none
@ -220,6 +222,10 @@ class Transcript():
self.language_code = language_code self.language_code = language_code
self.is_generated = is_generated self.is_generated = is_generated
self.translation_languages = translation_languages self.translation_languages = translation_languages
self._translation_languages_dict = {
translation_language['language_code']: translation_language['language']
for translation_language in translation_languages
}
def fetch(self): def fetch(self):
""" """
@ -238,27 +244,26 @@ class Transcript():
language_code=self.language_code, language_code=self.language_code,
) )
# TODO integrate translations in future release @property
# @property def is_translatable(self):
# def is_translatable(self): return len(self.translation_languages) > 0
# return len(self.translation_languages) > 0
# def translate(self, language_code):
# if not self.is_translatable:
# class TranslatableTranscript(Transcript): raise NotTranslatable(self.video_id)
# def __init__(self, http_client, url, translation_languages):
# super(TranslatableTranscript, self).__init__(http_client, url) if language_code not in self._translation_languages_dict:
# self._translation_languages = translation_languages raise TranslationLanguageNotAvailable(self.video_id)
# self._translation_language_codes = {language['language_code'] for language in translation_languages}
# return Transcript(
# self._http_client,
# def translate(self, language_code): self.video_id,
# if language_code not in self._translation_language_codes: '{url}&tlang={language_code}'.format(url=self._url, language_code=language_code),
# raise TranslatableTranscript.TranslationLanguageNotAvailable() self._translation_languages_dict[language_code],
# language_code,
# return Transcript( True,
# self._http_client, [],
# '{url}&tlang={language_code}'.format(url=self._url, language_code=language_code) )
# )
class _TranscriptParser(): class _TranscriptParser():