From 16f5bbc464602773e61eeafef51d1dbc47987bb4 Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 10 Jul 2024 18:20:59 +0100 Subject: [PATCH] [YouTube] Fix nsig processing for player `b22ef6e7` * improve extraction of function name (like yt-dlp/yt-dlp#10390) * always use JSInterp to extract function code (yt-dlp/yt-dlp#10396, thx seproDev, pukkandan) --- test/test_youtube_signature.py | 4 ++++ youtube_dl/extractor/youtube.py | 24 ++++++++++-------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index cafba7a5c..cc18d0f7b 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -162,6 +162,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js', '1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg', ), + ( + 'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js', + 'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A', + ), ] diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 90c16e172..2e31a8979 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1636,7 +1636,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): try: jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url) except ExtractorError as e: - raise ExtractorError('Unable to extract nsig jsi, player_id, func_codefunction code', cause=e) + raise ExtractorError('Unable to extract nsig function code', cause=e) if self.get_param('youtube_print_sig_code'): self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format( player_id, func_code[1])) @@ -1658,8 +1658,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function_name(self, jscode): func_name, idx = self._search_regex( - r'\.get\("n"\)\)&&\(b=(?P[a-zA-Z_$][\w$]*)(?:\[(?P\d+)\])?\([\w$]+\)', - jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) + # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c) + # old: .get("n"))&&(b=nfunc[idx](b) + # older: .get("n"))&&(b=nfunc(b) + r'''(?x) + (?:\(\s*(?P[a-z])\s*=\s*String\s*\.\s*fromCharCode\s*\(\s*110\s*\)\s*,(?P[a-z])\s*=\s*[a-z]\s*)? + \.\s*get\s*\(\s*(?(b)(?P=b)|"n")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s* + (?P[a-zA-Z_$][\w$]*)(?:\s*\[(?P\d+)\])?\s*\(\s*[\w$]+\s*\) + ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) if not idx: return func_name @@ -1679,17 +1685,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): func_name = self._extract_n_function_name(jscode) - # For redundancy - func_code = self._search_regex( - r'''(?xs)%s\s*=\s*function\s*\((?P[\w$]+)\)\s* - # NB: The end of the regex is intentionally kept strict - {(?P.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name, - jscode, 'nsig function', group=('var', 'code'), default=None) - if func_code: - func_code = ([func_code[0]], func_code[1]) - else: - self.write_debug('Extracting nsig function with jsinterp') - func_code = jsi.extract_function_code(func_name) + func_code = jsi.extract_function_code(func_name) self.cache.store('youtube-nsig', player_id, func_code) return jsi, player_id, func_code