Merge pull request #28 from jdepoix/bugfix/ISSUE-27

adjusted to changes in YouTube webclient
This commit is contained in:
jdepoix 2019-10-07 18:49:46 +02:00 committed by GitHub
commit 8c2ce71765
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 8 additions and 1 deletions

View File

@ -99,6 +99,7 @@ class _TranscriptFetcher():
WATCH_URL = 'https://www.youtube.com/watch?v={video_id}' WATCH_URL = 'https://www.youtube.com/watch?v={video_id}'
API_BASE_URL = 'https://www.youtube.com/api/{api_url}' API_BASE_URL = 'https://www.youtube.com/api/{api_url}'
LANGUAGE_REGEX = re.compile(r'(&lang=.*&)|(&lang=.*)') LANGUAGE_REGEX = re.compile(r'(&lang=.*&)|(&lang=.*)')
TIMEDTEXT_STRING = 'timedtext'
def __init__(self, video_id, languages, proxies): def __init__(self, video_id, languages, proxies):
self.video_id = video_id self.video_id = video_id
@ -110,7 +111,13 @@ class _TranscriptFetcher():
fetched_site = requests.get(self.WATCH_URL.format(video_id=self.video_id), proxies=self.proxies).text fetched_site = requests.get(self.WATCH_URL.format(video_id=self.video_id), proxies=self.proxies).text
else: else:
fetched_site = requests.get(self.WATCH_URL.format(video_id=self.video_id)).text fetched_site = requests.get(self.WATCH_URL.format(video_id=self.video_id)).text
timedtext_url_start = fetched_site.find('timedtext') timedtext_splits = fetched_site.split(self.TIMEDTEXT_STRING)
timedtext_url_start = (
timedtext_splits[2].find(self.TIMEDTEXT_STRING)
+ len(timedtext_splits[0])
+ len(timedtext_splits[1])
+ len(self.TIMEDTEXT_STRING) + 1
)
for language in (self.languages if self.languages else [None,]): for language in (self.languages if self.languages else [None,]):
response = self._execute_api_request(fetched_site, timedtext_url_start, language) response = self._execute_api_request(fetched_site, timedtext_url_start, language)