From eda8ddb38f229369447b463834d5d3c0b773536f Mon Sep 17 00:00:00 2001
From: "E. Seiver" <5547078+eseiver@users.noreply.github.com>
Date: Wed, 12 Apr 2023 14:29:19 -0700
Subject: [PATCH] _html_regex static property of _TranscriptParser()

also rename TEXT_FORMATS -> FORMATTING TAGS
---
 youtube_transcript_api/_transcripts.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/youtube_transcript_api/_transcripts.py b/youtube_transcript_api/_transcripts.py
index 59d2f4c..32e0fc4 100644
--- a/youtube_transcript_api/_transcripts.py
+++ b/youtube_transcript_api/_transcripts.py
@@ -27,7 +27,7 @@ from ._errors import (
 )
 from ._settings import WATCH_URL
 
-TEXT_FORMATS = [
+_FORMATTING_TAGS = [
     'strong',  # important
     'em',  # emphasized
     'b',  # bold
@@ -341,11 +341,11 @@ class Transcript(object):
 class _TranscriptParser(object):
     def __init__(self, preserve_formatting=False):
         self.preserve_formatting = preserve_formatting
+        self._html_regex = self.get_html_regex()
 
-    @property
-    def html_regex(self):
+    def get_html_regex(self):
         if self.preserve_formatting:
-            formats_regex = '|'.join(TEXT_FORMATS)
+            formats_regex = '|'.join(_FORMATTING_TAGS)
             formats_regex = r'<\/?(?!\/?(' + formats_regex + r')\b).*?\b>'
             html_regex = re.compile(formats_regex, re.IGNORECASE)
         else:
@@ -355,7 +355,7 @@ class _TranscriptParser(object):
     def parse(self, plain_data):
         return [
             {
-                'text': re.sub(self.html_regex, '', unescape(xml_element.text)),
+                'text': re.sub(self._html_regex, '', unescape(xml_element.text)),
                 'start': float(xml_element.attrib['start']),
                 'duration': float(xml_element.attrib.get('dur', '0.0')),
             }