From ceaa2c78fa17c3d1f89f7cc6481ce7d2bf4512fe Mon Sep 17 00:00:00 2001 From: 01001110 Date: Fri, 24 Mar 2023 16:22:13 +0800 Subject: [PATCH] [chelseafc] improve regex --- youtube_dl/extractor/chelseafc.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/chelseafc.py b/youtube_dl/extractor/chelseafc.py index 5402ca801..5c3234243 100644 --- a/youtube_dl/extractor/chelseafc.py +++ b/youtube_dl/extractor/chelseafc.py @@ -43,12 +43,17 @@ class ChelseafcIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - raw_data = self._html_search_regex( - # TODO improve regex - r'(?:]+(?:data-component="VideoDetails".*?)+data-props="([^"]*))', + video_details_div = self._search_regex( + r'(]*\sdata-component\s*=\s*(?:"|\')\s*VideoDetails\s*(?:"|\')[^>]*>)', webpage, + 'div' + ) + raw_data = self._html_search_regex( + r']*\sdata-props\s*=\s*(?:"|\')\s*([^"\']*)\s*(?:"|\')[^>]*>', + video_details_div, 'data' ) + data = json.loads(raw_data)['videoDetail'] manifest_url = data['signedUrl']