_html_regex static property of _TranscriptParser()

also rename TEXT_FORMATS -> FORMATTING TAGS
2023-04-12 14:29:19 -07:00 · 2023-04-12 14:29:19 -07:00 · eda8ddb38f
parent 72e9781528
commit eda8ddb38f
1 changed files with 5 additions and 5 deletions
--- a/youtube_transcript_api/_transcripts.py
+++ b/youtube_transcript_api/_transcripts.py
@ -27,7 +27,7 @@ from ._errors import (
 )
 from ._settings import WATCH_URL
-TEXT_FORMATS = [
+_FORMATTING_TAGS = [
    'strong',  # important
    'em',  # emphasized
    'b',  # bold
@ -341,11 +341,11 @@ class Transcript(object):
 class _TranscriptParser(object):
    def __init__(self, preserve_formatting=False):
        self.preserve_formatting = preserve_formatting
        self._html_regex = self.get_html_regex()
-    @property
+    def get_html_regex(self):
    def html_regex(self):
        if self.preserve_formatting:
-            formats_regex = '|'.join(TEXT_FORMATS)
+            formats_regex = '|'.join(_FORMATTING_TAGS)
            formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>'
            html_regex = re.compile(formats_regex, re.IGNORECASE)
        else:
@ -355,7 +355,7 @@ class _TranscriptParser(object):
    def parse(self, plain_data):
        return [
            {
-                'text': re.sub(self.html_regex, '', unescape(xml_element.text)),
+                'text': re.sub(self._html_regex, '', unescape(xml_element.text)),
                'start': float(xml_element.attrib['start']),
                'duration': float(xml_element.attrib.get('dur', '0.0')),
            }