Merge a4759ae8ea into d55d1f423d

[YouTube] Always extract using MWEB API client
* temporary fix-up for 403 on download * MWEB parameters from yt-dlp 2024-12-06
2024-12-22 07:56:49 +00:00 · 2024-12-21 10:50:30 +01:00 · 2024-12-16 12:38:51 +00:00 · 2024-12-16 12:38:51 +00:00 · 2024-12-16 12:38:51 +00:00 · 2024-12-16 12:38:51 +00:00
6 changed files with 882 additions and 388 deletions
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# coding: utf-8

 from __future__ import unicode_literals

@ -11,7 +12,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import math
 import re

-from youtube_dl.compat import compat_str
+from youtube_dl.compat import compat_str as str
 from youtube_dl.jsinterp import JS_Undefined, JSInterpreter

 NaN = object()
@ -19,7 +20,7 @@ NaN = object()

 class TestJSInterpreter(unittest.TestCase):
    def _test(self, jsi_or_code, expected, func='f', args=()):
-        if isinstance(jsi_or_code, compat_str):
+        if isinstance(jsi_or_code, str):
            jsi_or_code = JSInterpreter(jsi_or_code)
        got = jsi_or_code.call_function(func, *args)
        if expected is NaN:
@ -40,16 +41,27 @@ class TestJSInterpreter(unittest.TestCase):
        self._test('function f(){return 42 + 7;}', 49)
        self._test('function f(){return 42 + undefined;}', NaN)
        self._test('function f(){return 42 + null;}', 42)
+        self._test('function f(){return 1 + "";}', '1')
+        self._test('function f(){return 42 + "7";}', '427')
+        self._test('function f(){return false + true;}', 1)
+        self._test('function f(){return "false" + true;}', 'falsetrue')
+        self._test('function f(){return '
+                   '1 + "2" + [3,4] + {k: 56} + null + undefined + Infinity;}',
+                   '123,4[object Object]nullundefinedInfinity')

    def test_sub(self):
        self._test('function f(){return 42 - 7;}', 35)
        self._test('function f(){return 42 - undefined;}', NaN)
        self._test('function f(){return 42 - null;}', 42)
+        self._test('function f(){return 42 - "7";}', 35)
+        self._test('function f(){return 42 - "spam";}', NaN)

    def test_mul(self):
        self._test('function f(){return 42 * 7;}', 294)
        self._test('function f(){return 42 * undefined;}', NaN)
        self._test('function f(){return 42 * null;}', 0)
+        self._test('function f(){return 42 * "7";}', 294)
+        self._test('function f(){return 42 * "eggs";}', NaN)

    def test_div(self):
        jsi = JSInterpreter('function f(a, b){return a / b;}')
@ -57,17 +69,26 @@ class TestJSInterpreter(unittest.TestCase):
        self._test(jsi, NaN, args=(JS_Undefined, 1))
        self._test(jsi, float('inf'), args=(2, 0))
        self._test(jsi, 0, args=(0, 3))
+        self._test(jsi, 6, args=(42, 7))
+        self._test(jsi, 0, args=(42, float('inf')))
+        self._test(jsi, 6, args=("42", 7))
+        self._test(jsi, NaN, args=("spam", 7))

    def test_mod(self):
        self._test('function f(){return 42 % 7;}', 0)
        self._test('function f(){return 42 % 0;}', NaN)
        self._test('function f(){return 42 % undefined;}', NaN)
+        self._test('function f(){return 42 % "7";}', 0)
+        self._test('function f(){return 42 % "beans";}', NaN)

    def test_exp(self):
        self._test('function f(){return 42 ** 2;}', 1764)
        self._test('function f(){return 42 ** undefined;}', NaN)
        self._test('function f(){return 42 ** null;}', 1)
+        self._test('function f(){return undefined ** 0;}', 1)
        self._test('function f(){return undefined ** 42;}', NaN)
+        self._test('function f(){return 42 ** "2";}', 1764)
+        self._test('function f(){return 42 ** "spam";}', NaN)

    def test_calc(self):
        self._test('function f(a){return 2*a+1;}', 7, args=[3])
@ -89,7 +110,35 @@ class TestJSInterpreter(unittest.TestCase):
        self._test('function f(){return 19 & 21;}', 17)
        self._test('function f(){return 11 >> 2;}', 2)
        self._test('function f(){return []? 2+3: 4;}', 5)
+        # equality
+        self._test('function f(){return 1 == 1}', True)
+        self._test('function f(){return 1 == 1.0}', True)
+        self._test('function f(){return 1 == "1"}', True)
        self._test('function f(){return 1 == 2}', False)
+        self._test('function f(){return 1 != "1"}', False)
+        self._test('function f(){return 1 != 2}', True)
+        self._test('function f(){var x = {a: 1}; var y = x; return x == y}', True)
+        self._test('function f(){var x = {a: 1}; return x == {a: 1}}', False)
+        self._test('function f(){return NaN == NaN}', False)
+        self._test('function f(){return null == undefined}', True)
+        self._test('function f(){return "spam, eggs" == "spam, eggs"}', True)
+        # strict equality
+        self._test('function f(){return 1 === 1}', True)
+        self._test('function f(){return 1 === 1.0}', True)
+        self._test('function f(){return 1 === "1"}', False)
+        self._test('function f(){return 1 === 2}', False)
+        self._test('function f(){var x = {a: 1}; var y = x; return x === y}', True)
+        self._test('function f(){var x = {a: 1}; return x === {a: 1}}', False)
+        self._test('function f(){return NaN === NaN}', False)
+        self._test('function f(){return null === undefined}', False)
+        self._test('function f(){return null === null}', True)
+        self._test('function f(){return undefined === undefined}', True)
+        self._test('function f(){return "uninterned" === "uninterned"}', True)
+        self._test('function f(){return 1 === 1}', True)
+        self._test('function f(){return 1 === "1"}', False)
+        self._test('function f(){return 1 !== 1}', False)
+        self._test('function f(){return 1 !== "1"}', True)
+        # expressions
        self._test('function f(){return 0 && 1 || 2;}', 2)
        self._test('function f(){return 0 ?? 42;}', 0)
        self._test('function f(){return "life, the universe and everything" < 42;}', False)
@ -111,7 +160,6 @@ class TestJSInterpreter(unittest.TestCase):
        self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
        self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)

-    @unittest.skip('Not yet fully implemented')
    def test_comments(self):
        self._test('''
            function f() {
@ -130,6 +178,15 @@ class TestJSInterpreter(unittest.TestCase):
            }
        ''', 3)

+        self._test('''
+            function f() {
+                var x = ( /* 1 + */ 2 +
+                          /* 30 * 40 */
+                          50);
+                return x;
+            }
+        ''', 52)
+
    def test_precedence(self):
        self._test('''
            function f() {
@ -266,7 +323,20 @@ class TestJSInterpreter(unittest.TestCase):
        self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)

    def test_void(self):
-        self._test('function f() { return void 42; }', None)
+        self._test('function f() { return void 42; }', JS_Undefined)
+
+    def test_typeof(self):
+        self._test('function f() { return typeof undefined; }', 'undefined')
+        self._test('function f() { return typeof NaN; }', 'number')
+        self._test('function f() { return typeof Infinity; }', 'number')
+        self._test('function f() { return typeof true; }', 'boolean')
+        self._test('function f() { return typeof null; }', 'object')
+        self._test('function f() { return typeof "a string"; }', 'string')
+        self._test('function f() { return typeof 42; }', 'number')
+        self._test('function f() { return typeof 42.42; }', 'number')
+        self._test('function f() { var g = function(){}; return typeof g; }', 'function')
+        self._test('function f() { return typeof {key: "value"}; }', 'object')
+        # not yet implemented: Symbol, BigInt

    def test_return_function(self):
        jsi = JSInterpreter('''
@ -324,6 +394,16 @@ class TestJSInterpreter(unittest.TestCase):
        self._test('function f() { let a; return a?.qq; }', JS_Undefined)
        self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)

+    def test_indexing(self):
+        self._test('function f() { return [1, 2, 3, 4][3]}', 4)
+        self._test('function f() { return [1, [2, [3, [4]]]][1][1][1][0]}', 4)
+        self._test('function f() { var o = {1: 2, 3: 4}; return o[3]}', 4)
+        self._test('function f() { var o = {1: 2, 3: 4}; return o["3"]}', 4)
+        self._test('function f() { return [1, [2, {3: [4]}]][1][1]["3"][0]}', 4)
+        self._test('function f() { return [1, 2, 3, 4].length}', 4)
+        self._test('function f() { var o = {1: 2, 3: 4}; return o.length}', JS_Undefined)
+        self._test('function f() { var o = {1: 2, 3: 4}; o["length"] = 42; return o.length}', 42)
+
    def test_regex(self):
        self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)

@ -411,6 +491,13 @@ class TestJSInterpreter(unittest.TestCase):
            self._test(jsi, 't-e-s-t', args=[test_input, '-'])
            self._test(jsi, '', args=[[], '-'])

+        self._test('function f(){return '
+                   '[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join()}',
+                   '1,1,abc,[object Object],,,Infinity,NaN')
+        self._test('function f(){return '
+                   '[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join("~")}',
+                   '1~1~abc~[object Object]~~~Infinity~NaN')
+
    def test_split(self):
        test_result = list('test')
        tests = [
@ -424,6 +511,18 @@ class TestJSInterpreter(unittest.TestCase):
            self._test(jsi, test_result, args=['t-e-s-t', '-'])
            self._test(jsi, [''], args=['', '-'])
            self._test(jsi, [], args=['', ''])
+        # RegExp split
+        self._test('function f(){return "test".split(/(?:)/)}',
+                   ['t', 'e', 's', 't'])
+        self._test('function f(){return "t-e-s-t".split(/[es-]+/)}',
+                   ['t', 't'])
+        # from MDN: surrogate pairs aren't handled: case 1 fails
+        # self._test('function f(){return "😄😄".split(/(?:)/)}',
+        #            ['\ud83d', '\ude04', '\ud83d', '\ude04'])
+        # case 2 beats Py3.2: it gets the case 1 result
+        if sys.version_info >= (2, 6) and not ((3, 0) <= sys.version_info < (3, 3)):
+            self._test('function f(){return "😄😄".split(/(?:)/u)}',
+                       ['😄', '😄'])

    def test_slice(self):
        self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
@ -453,6 +552,40 @@ class TestJSInterpreter(unittest.TestCase):
        self._test('function f(){return "012345678".slice(-1, 1)}', '')
        self._test('function f(){return "012345678".slice(-3, -1)}', '67')

+    def test_pop(self):
+        # pop
+        self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.pop(), a]}',
+                   [8, [0, 1, 2, 3, 4, 5, 6, 7]])
+        self._test('function f(){return [].pop()}', JS_Undefined)
+        # push
+        self._test('function f(){var a = [0, 1, 2]; return [a.push(3, 4), a]}',
+                   [5, [0, 1, 2, 3, 4]])
+        self._test('function f(){var a = [0, 1, 2]; return [a.push(), a]}',
+                   [3, [0, 1, 2]])
+
+    def test_shift(self):
+        # shift
+        self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.shift(), a]}',
+                   [0, [1, 2, 3, 4, 5, 6, 7, 8]])
+        self._test('function f(){return [].shift()}', JS_Undefined)
+        # unshift
+        self._test('function f(){var a = [0, 1, 2]; return [a.unshift(3, 4), a]}',
+                   [5, [3, 4, 0, 1, 2]])
+        self._test('function f(){var a = [0, 1, 2]; return [a.unshift(), a]}',
+                   [3, [0, 1, 2]])
+
+    def test_forEach(self):
+        self._test('function f(){var ret = []; var l = [4, 2]; '
+                   'var log = function(e,i,a){ret.push([e,i,a]);}; '
+                   'l.forEach(log); '
+                   'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
+                   [2, 4, 1, [4, 2]])
+        self._test('function f(){var ret = []; var l = [4, 2]; '
+                   'var log = function(e,i,a){this.push([e,i,a]);}; '
+                   'l.forEach(log, ret); '
+                   'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
+                   [2, 4, 1, [4, 2]])
+

 if __name__ == '__main__':
    unittest.main()
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# coding: utf-8

 from __future__ import unicode_literals

@ -12,6 +13,7 @@ import re
 import string

 from youtube_dl.compat import (
+    compat_contextlib_suppress,
    compat_open as open,
    compat_str,
    compat_urlretrieve,
@ -50,23 +52,38 @@ _SIG_TESTS = [
    (
        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
        84,
-        '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
+        '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>',
    ),
    (
        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
        83,
-        '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
+        '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F',
    ),
    (
        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
        '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
-        '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
+        '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B',
    ),
    (
        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
        '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
        '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
-    )
+    ),
+    (
+        'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
+    ),
+    (
+        'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+    ),
+    (
+        'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
+        '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
+        '0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
+    ),
 ]

 _NSIG_TESTS = [
@ -142,6 +159,10 @@ _NSIG_TESTS = [
        'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
        'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
    ),
+    (
+        'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js',
+        'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w',
+    ),
    (
        'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
        'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
@ -154,6 +175,10 @@ _NSIG_TESTS = [
        'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
        'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
    ),
+    (
+        'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
+        '1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A',
+    ),
    (
        'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
        '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
@ -182,6 +207,18 @@ _NSIG_TESTS = [
        'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
        'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
    ),
+    (
+        'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
+        'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
+    ),
+    (
+        'https://www.youtube.com/s/player/f8f53e1a/player_ias.vflset/en_US/base.js',
+        'VTQOUOv0mCIeJ7i8kZB', 'kcfD8wy0sNLyNQ',
+    ),
+    (
+        'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
+        'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP',
+    ),
 ]


@ -216,11 +253,9 @@ class TestSignature(unittest.TestCase):
            os.mkdir(self.TESTDATA_DIR)

    def tearDown(self):
-        try:
+        with compat_contextlib_suppress(OSError):
            for f in os.listdir(self.TESTDATA_DIR):
                os.remove(f)
-        except OSError:
-            pass


 def t_factory(name, sig_func, url_pattern):
@ -254,11 +289,12 @@ def signature(jscode, sig_input):

 def n_sig(jscode, sig_input):
    funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
-    return JSInterpreter(jscode).call_function(funcname, sig_input)
+    return JSInterpreter(jscode).call_function(
+        funcname, sig_input, _ytdl_do_not_return=sig_input)


 make_sig_test = t_factory(
-    'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
+    'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
 for test_spec in _SIG_TESTS:
    make_sig_test(*test_spec)

--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@ -4,208 +4,272 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-    compat_urlparse,
-)
+from ..compat import compat_str
 from ..utils import (
    ExtractorError,
+    GeoRestrictedError,
    int_or_none,
-    qualities,
+    merge_dicts,
+    parse_iso8601,
+    parse_qs,
    strip_or_none,
-    try_get,
-    unified_strdate,
+    traverse_obj,
    url_or_none,
+    urljoin,
 )


 class ArteTVBaseIE(InfoExtractor):
    _ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
-    _API_BASE = 'https://api.arte.tv/api/player/v1'
+    _API_BASE = 'https://api.arte.tv/api/player/v2'
+
+    # yt-dlp shims
+
+    @classmethod
+    def _match_valid_url(cls, url):
+        return re.match(cls._VALID_URL, url)
+
+    def _extract_m3u8_formats_and_subtitles(self, *args, **kwargs):
+        return self._extract_m3u8_formats(*args, **kwargs), {}


 class ArteTVIE(ArteTVBaseIE):
    _VALID_URL = r'''(?x)
-                    https?://
+                    (?:https?://
                        (?:
                            (?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
                            api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
                        )
-                        /(?P<id>\d{6}-\d{3}-[AF])
+                    |arte://program)
+                        /(?P<id>\d{6}-\d{3}-[AF]|LIVE)
                    ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
    _TESTS = [{
        'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
-        'info_dict': {
-            'id': '088501-000-A',
-            'ext': 'mp4',
-            'title': 'Mexico: Stealing Petrol to Survive',
-            'upload_date': '20190628',
-        },
+        'only_matching': True,
    }, {
        'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
-        'only_matching': True,
+        'info_dict': {
+            'id': '100103-000-A',
+            'title': 'USA: Dyskryminacja na porodówce',
+            'description': 'md5:242017b7cce59ffae340a54baefcafb1',
+            'alt_title': 'ARTE Reportage',
+            'timestamp': 1604417980,
+            'upload_date': '20201103',
+            'duration': 554,
+            # test format sort
+            'height': 720,
+            'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
+            'ext': 'mp4',
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': 'm3u8',
+        },
+    }, {
+        'note': 'No alt_title',
+        'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
+        'info_dict': {
+            'id': '110371-000-A',
+            'ext': 'mp4',
+            'upload_date': '20220718',
+            'duration': 154,
+            'timestamp': 1658162460,
+            'description': 'md5:5890f36fe7dccfadb8b7c0891de54786',
+            'title': 'La chaleur, supplice des arbres de rue',
+            'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/CPE2sQDtD8GLQgt8DuYHLf/940x530',
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': 'm3u8',
+        },
    }, {
        'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
        'only_matching': True,
+    }, {
+        'url': 'https://api.arte.tv/api/player/v2/config/de/LIVE',
+        'only_matching': True,
    }]

-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        lang = mobj.group('lang') or mobj.group('lang_2')
+    _GEO_BYPASS = True

-        info = self._download_json(
-            '%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
-        player_info = info['videoJsonPlayer']
-
-        vsr = try_get(player_info, lambda x: x['VSR'], dict)
-        if not vsr:
-            error = None
-            if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
-                error = try_get(
-                    player_info, lambda x: x['custom_msg']['msg'], compat_str)
-            if not error:
-                error = 'Video %s is not available' % player_info.get('VID') or video_id
-            raise ExtractorError(error, expected=True)
-
-        upload_date_str = player_info.get('shootingDate')
-        if not upload_date_str:
-            upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
-
-        title = (player_info.get('VTI') or player_info['VID']).strip()
-        subtitle = player_info.get('VSU', '').strip()
-        if subtitle:
-            title += ' - %s' % subtitle
-
-        qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
-
-        LANGS = {
-            'fr': 'F',
+    _LANG_MAP = {  # ISO639 -> French abbreviations
+        'r': 'F',
        'de': 'A',
        'en': 'E[ANG]',
        'es': 'E[ESP]',
        'it': 'E[ITA]',
        'pl': 'E[POL]',
+        # XXX: probably means mixed; <https://www.arte.tv/en/videos/107710-029-A/dispatches-from-ukraine-local-journalists-report/>
+        # uses this code for audio that happens to be in Ukrainian, but the manifest uses the ISO code 'mul' (mixed)
+        'mul': 'EU',
    }

-        langcode = LANGS.get(lang, lang)
+    _VERSION_CODE_RE = re.compile(r'''(?x)
+        V
+        (?P<original_voice>O?)
+        (?P<vlang>[FA]|E\[[A-Z]+\]|EU)?
+        (?P<audio_desc>AUD|)
+        (?:
+            (?P<has_sub>-ST)
+            (?P<sdh_sub>M?)
+            (?P<sub_lang>[FA]|E\[[A-Z]+\]|EU)
+        )?
+    ''')

-        formats = []
-        for format_id, format_dict in vsr.items():
-            f = dict(format_dict)
-            format_url = url_or_none(f.get('url'))
-            streamer = f.get('streamer')
-            if not format_url and not streamer:
-                continue
-            versionCode = f.get('versionCode')
-            l = re.escape(langcode)
+    # all obtained by exhaustive testing
+    _COUNTRIES_MAP = {
+        'DE_FR': (
+            'BL', 'DE', 'FR', 'GF', 'GP', 'MF', 'MQ', 'NC',
+            'PF', 'PM', 'RE', 'WF', 'YT',
+        ),
+        # with both of the below 'BE' sometimes works, sometimes doesn't
+        'EUR_DE_FR': (
+            'AT', 'BL', 'CH', 'DE', 'FR', 'GF', 'GP', 'LI',
+            'MC', 'MF', 'MQ', 'NC', 'PF', 'PM', 'RE', 'WF',
+            'YT',
+        ),
+        'SAT': (
+            'AD', 'AT', 'AX', 'BG', 'BL', 'CH', 'CY', 'CZ',
+            'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GF',
+            'GR', 'HR', 'HU', 'IE', 'IS', 'IT', 'KN', 'LI',
+            'LT', 'LU', 'LV', 'MC', 'MF', 'MQ', 'MT', 'NC',
+            'NL', 'NO', 'PF', 'PL', 'PM', 'PT', 'RE', 'RO',
+            'SE', 'SI', 'SK', 'SM', 'VA', 'WF', 'YT',
+        ),
+    }

-            # Language preference from most to least priority
-            # Reference: section 6.8 of
-            # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
-            PREFERENCES = (
-                # original version in requested language, without subtitles
-                r'VO{0}$'.format(l),
-                # original version in requested language, with partial subtitles in requested language
-                r'VO{0}-ST{0}$'.format(l),
-                # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
-                r'VO{0}-STM{0}$'.format(l),
-                # non-original (dubbed) version in requested language, without subtitles
-                r'V{0}$'.format(l),
-                # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
-                r'V{0}-ST{0}$'.format(l),
-                # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
-                r'V{0}-STM{0}$'.format(l),
-                # original version in requested language, with partial subtitles in different language
-                r'VO{0}-ST(?!{0}).+?$'.format(l),
-                # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
-                r'VO{0}-STM(?!{0}).+?$'.format(l),
-                # original version in different language, with partial subtitles in requested language
-                r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
-                # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
-                r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
-                # original version in different language, without subtitles
-                r'VO(?:(?!{0}))?$'.format(l),
-                # original version in different language, with partial subtitles in different language
-                r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
-                # original version in different language, with subtitles for the deaf and hard-of-hearing in different language
-                r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
-            )
+    def _real_extract(self, url):
+        mobj = self._match_valid_url(url)
+        video_id = mobj.group('id')
+        lang = mobj.group('lang') or mobj.group('lang_2')
+        language_code = self._LANG_MAP.get(lang)
+
+        config = self._download_json('{0}/config/{1}/{2}'.format(self._API_BASE, lang, video_id), video_id)
+
+        geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
+        if geoblocking.get('restrictedArea'):
+            raise GeoRestrictedError('Video restricted to {0!r}'.format(geoblocking['code']),
+                                     countries=self._COUNTRIES_MAP.get(geoblocking['code'], ('DE', 'FR')))
+
+        if not traverse_obj(config, ('data', 'attributes', 'rights')):
+            # Eg: https://www.arte.tv/de/videos/097407-215-A/28-minuten
+            # Eg: https://www.arte.tv/es/videos/104351-002-A/serviteur-du-peuple-1-23
+            raise ExtractorError(
+                'Video is not available in this language edition of Arte or broadcast rights expired', expected=True)
+
+        formats, subtitles = [], {}
+        secondary_formats = []
+        for stream in config['data']['attributes']['streams']:
+            # official player contains code like `e.get("versions")[0].eStat.ml5`
+            stream_version = stream['versions'][0]
+            stream_version_code = stream_version['eStat']['ml5']

-            for pref, p in enumerate(PREFERENCES):
-                if re.match(p, versionCode):
-                    lang_pref = len(PREFERENCES) - pref
-                    break
-            else:
            lang_pref = -1
+            m = self._VERSION_CODE_RE.match(stream_version_code)
+            if m:
+                lang_pref = int(''.join('01'[x] for x in (
+                    m.group('vlang') == language_code,      # we prefer voice in the requested language
+                    not m.group('audio_desc'),              # and not the audio description version
+                    bool(m.group('original_voice')),        # but if voice is not in the requested language, at least choose the original voice
+                    m.group('sub_lang') == language_code,   # if subtitles are present, we prefer them in the requested language
+                    not m.group('has_sub'),                 # but we prefer no subtitles otherwise
+                    not m.group('sdh_sub'),                 # and we prefer not the hard-of-hearing subtitles if there are subtitles
+                )))

-            media_type = f.get('mediaType')
-            if media_type == 'hls':
-                m3u8_formats = self._extract_m3u8_formats(
-                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id=format_id, fatal=False)
-                for m3u8_format in m3u8_formats:
-                    m3u8_format['language_preference'] = lang_pref
-                formats.extend(m3u8_formats)
-                continue
-
-            format = {
-                'format_id': format_id,
-                'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
+            short_label = traverse_obj(stream_version, 'shortLabel', expected_type=compat_str, default='?')
+            if stream['protocol'].startswith('HLS'):
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                    stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
+                for fmt in fmts:
+                    fmt.update({
+                        'format_note': '{0} [{1}]'.format(stream_version.get("label", "unknown"), short_label),
                        'language_preference': lang_pref,
-                'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
-                'width': int_or_none(f.get('width')),
-                'height': int_or_none(f.get('height')),
-                'tbr': int_or_none(f.get('bitrate')),
-                'quality': qfunc(f.get('quality')),
-            }
-
-            if media_type == 'rtmp':
-                format['url'] = f['streamer']
-                format['play_path'] = 'mp4:' + f['url']
-                format['ext'] = 'flv'
+                    })
+                if any(map(short_label.startswith, ('cc', 'OGsub'))):
+                    secondary_formats.extend(fmts)
                else:
-                format['url'] = f['url']
+                    formats.extend(fmts)
+                for sub in subs:
+                    subtitles = self._merge_subtitles(subtitles, sub)

-            formats.append(format)
+            elif stream['protocol'] in ('HTTPS', 'RTMP'):
+                formats.append({
+                    'format_id': '{0}-{1}'.format(stream["protocol"], stream_version_code),
+                    'url': stream['url'],
+                    'format_note': '{0} [{1}]'.format(stream_version.get("label", "unknown"), short_label),
+                    'language_preference': lang_pref,
+                    # 'ext': 'mp4',  # XXX: may or may not be necessary, at least for HTTPS
+                })

+            else:
+                self.report_warning('Skipping stream with unknown protocol {0}'.format(stream["protocol"]))
+
+            # TODO: chapters from stream['segments']?
+            # The JS also looks for chapters in config['data']['attributes']['chapters'],
+            # but I am yet to find a video having those
+
+        formats.extend(secondary_formats)
+        self._remove_duplicate_formats(formats)
        self._sort_formats(formats)

+        metadata = config['data']['attributes']['metadata']
+
        return {
-            'id': player_info.get('VID') or video_id,
-            'title': title,
-            'description': player_info.get('VDE'),
-            'upload_date': unified_strdate(upload_date_str),
-            'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
+            'id': metadata['providerId'],
+            'webpage_url': traverse_obj(metadata, ('link', 'url')),
+            'title': traverse_obj(metadata, 'subtitle', 'title'),
+            'alt_title': metadata.get('subtitle') and metadata.get('title'),
+            'description': metadata.get('description'),
+            'duration': traverse_obj(metadata, ('duration', 'seconds')),
+            'language': metadata.get('language'),
+            'timestamp': traverse_obj(config, ('data', 'attributes', 'rights', 'begin'), expected_type=parse_iso8601),
+            'is_live': config['data']['attributes'].get('live', False),
            'formats': formats,
+            'subtitles': subtitles,
+            'thumbnails': [
+                {'url': image['url'], 'id': image.get('caption')}
+                for image in metadata.get('images') or [] if url_or_none(image.get('url'))
+            ],
        }


 class ArteTVEmbedIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
+    _EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1']
    _TESTS = [{
        'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
+        'only_matching': True,
+        'skip': 'Video is not available in this language edition of Arte or broadcast rights expired'
+    }, {
+        'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fpl%2F100103-000-A&lang=pl&autoplay=true&mute=100103-000-A',
        'info_dict': {
-            'id': '100605-013-A',
+            'id': '100103-000-A',
            'ext': 'mp4',
-            'title': 'United we Stream November Lockdown Edition #13',
-            'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
-            'upload_date': '20201116',
+            'title': 'USA: Dyskryminacja na porodówce',
+            'timestamp': 1604417980,
+            'upload_date': '20201103',
+            'description': 'md5:242017b7cce59ffae340a54baefcafb1',
+            'duration': 554,
+        },
+        'params': {
+            'format': 'bestvideo',
+            'skip_download': 'm3u8',
        },
    }, {
        'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
        'only_matching': True,
    }]

-    @staticmethod
-    def _extract_urls(webpage):
-        return [url for _, url in re.findall(
-            r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
-            webpage)]
+    @classmethod
+    def _extract_urls(cls, webpage):
+        import itertools    # just until this is lifted into IE
+        return list(itertools.chain(*(
+            (url for _, url in re.findall(erx, webpage)) for erx in cls._EMBED_REGEX)
+        ))

    def _real_extract(self, url):
-        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-        json_url = qs['json_url'][0]
+        qs = parse_qs(url)
+        json_url = qs['json_url'][-1]
        video_id = ArteTVIE._match_id(json_url)
        return self.url_result(
            json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
@ -215,44 +279,45 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
    _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
    _TESTS = [{
        'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
-        'info_dict': {
-            'id': 'RC-016954',
-            'title': 'Earn a Living',
-            'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
-        },
-        'playlist_mincount': 6,
+        'only_matching': True,
    }, {
        'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
-        'only_matching': True,
+        'playlist_mincount': 100,
+        'info_dict': {
+            'description': 'md5:84e7bf1feda248bc325ebfac818c476e',
+            'id': 'RC-014123',
+            'title': 'ARTE Reportage - najlepsze reportaże',
+        },
+        'skip': '404 Not Found',
+    }, {
+        'url': 'https://www.arte.tv/en/videos/RC-016979/war-in-ukraine/',
+        'playlist_mincount': 79,
+        'info_dict': {
+            'id': 'RC-016979',
+            'title': 'War in Ukraine',
+            'description': 'On 24 February, Russian armed forces invaded Ukraine. We follow the war day by day and provide background information with special insights, reports and documentaries.',
+        },
    }]

    def _real_extract(self, url):
-        lang, playlist_id = re.match(self._VALID_URL, url).groups()
-        collection = self._download_json(
-            '%s/collectionData/%s/%s?source=videos'
-            % (self._API_BASE, lang, playlist_id), playlist_id)
-        entries = []
-        for video in collection['videos']:
-            if not isinstance(video, dict):
-                continue
-            video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
-            if not video_url:
-                continue
-            video_id = video.get('programId')
-            entries.append({
+        lang, playlist_id = self._match_valid_url(url).group('lang', 'id')
+        playlist = self._download_json(
+            '{0}/playlist/{1}/{2}'.format(self._API_BASE, lang, playlist_id), playlist_id)['data']['attributes']
+
+        entries = [{
            '_type': 'url_transparent',
-                'url': video_url,
-                'id': video_id,
+            'url': video['config']['url'],
+            'ie_key': ArteTVIE.ie_key(),
+            'id': video.get('providerId'),
            'title': video.get('title'),
            'alt_title': video.get('subtitle'),
-                'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
-                'duration': int_or_none(video.get('durationSeconds')),
-                'view_count': int_or_none(video.get('views')),
-                'ie_key': ArteTVIE.ie_key(),
-            })
-        title = collection.get('title')
-        description = collection.get('shortDescription') or collection.get('teaserText')
-        return self.playlist_result(entries, playlist_id, title, description)
+            'thumbnail': url_or_none(traverse_obj(video, ('mainImage', 'url'))),
+            'duration': int_or_none(traverse_obj(video, ('duration', 'seconds'))),
+        } for video in traverse_obj(playlist, ('items', lambda _, v: v['config']['url']))]
+
+        return self.playlist_result(entries, playlist_id,
+                                    traverse_obj(playlist, ('metadata', 'title')),
+                                    traverse_obj(playlist, ('metadata', 'description')))


 class ArteTVCategoryIE(ArteTVBaseIE):
@ -262,11 +327,10 @@ class ArteTVCategoryIE(ArteTVBaseIE):
        'info_dict': {
            'id': 'politics-and-society',
            'title': 'Politics and society',
-            'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
+            'description': 'Watch documentaries and reportage about politics, society and current affairs.',
        },
        'playlist_mincount': 13,
-    },
-    ]
+    }]

    @classmethod
    def suitable(cls, url):
@ -275,27 +339,23 @@ class ArteTVCategoryIE(ArteTVBaseIE):
            and super(ArteTVCategoryIE, cls).suitable(url))

    def _real_extract(self, url):
-        lang, playlist_id = re.match(self._VALID_URL, url).groups()
+        lang, playlist_id = self._match_valid_url(url).groups()
        webpage = self._download_webpage(url, playlist_id)

        items = []
        for video in re.finditer(
-                r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
+                r'<a\b[^>]+\bhref\s*=\s*(?P<q>"|\'|\b)(?P<url>(?:https?://www\.arte\.tv)?/%s/videos/[\w/-]+)(?P=q)' % lang,
                webpage):
-            video = video.group('url')
+            video = urljoin(url, video.group('url'))
            if video == url:
                continue
            if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
                items.append(video)

-        if items:
        title = (self._og_search_title(webpage, default=None)
                 or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
        title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)

-            result = self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title)
-            if result:
-                description = self._og_search_description(webpage, default=None)
-                if description:
-                    result['description'] = description
-                return result
+        return merge_dicts(
+            self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title),
+            {'description': self._og_search_description(webpage, default=None)})
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -3170,7 +3170,7 @@ class InfoExtractor(object):
                    # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
                    # of jwplayer.flash.swf
                    rtmp_url_parts = re.split(
-                        r'((?:mp4|mp3|flv):)', source_url, 1)
+                        r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
                    if len(rtmp_url_parts) == 3:
                        rtmp_url, prefix, play_path = rtmp_url_parts
                        a_format.update({
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -3,11 +3,13 @@
 from __future__ import unicode_literals

 import collections
+import hashlib
 import itertools
 import json
 import os.path
 import random
 import re
+import time
 import traceback

 from .common import InfoExtractor, SearchInfoExtractor
@ -290,6 +292,33 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
    _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
    _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'

+    _SAPISID = None
+
+    def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
+        time_now = round(time.time())
+        if self._SAPISID is None:
+            yt_cookies = self._get_cookies('https://www.youtube.com')
+            # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
+            # See: https://github.com/yt-dlp/yt-dlp/issues/393
+            sapisid_cookie = dict_get(
+                yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
+            if sapisid_cookie and sapisid_cookie.value:
+                self._SAPISID = sapisid_cookie.value
+                self.write_debug('Extracted SAPISID cookie')
+                # SAPISID cookie is required if not already present
+                if not yt_cookies.get('SAPISID'):
+                    self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
+                    self._set_cookie(
+                        '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
+            else:
+                self._SAPISID = False
+        if not self._SAPISID:
+            return None
+        # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
+        sapisidhash = hashlib.sha1(
+            '{0} {1} {2}'.format(time_now, self._SAPISID, origin).encode('utf-8')).hexdigest()
+        return 'SAPISIDHASH {0}_{1}'.format(time_now, sapisidhash)
+
    def _call_api(self, ep, query, video_id, fatal=True, headers=None):
        data = self._DEFAULT_API_DATA.copy()
        data.update(query)
@ -1579,20 +1608,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        self.to_screen('Extracted signature function:\n' + code)

    def _parse_sig_js(self, jscode):
+        # Examples where `sig` is funcname:
+        # sig=function(a){a=a.split(""); ... ;return a.join("")};
+        # ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
+        # {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
+        # sig=function(J){J=J.split(""); ... ;return J.join("")};
+        # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
+        # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
        funcname = self._search_regex(
-            (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
-             r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
-             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
-             r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+            (r'\b(?P<var>[\w$]+)&&\((?P=var)=(?P<sig>[\w$]{2,})\(decodeURIComponent\((?P=var)\)\)',
+             r'(?P<sig>[\w$]+)\s*=\s*function\(\s*(?P<arg>[\w$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
+             r'(?:\b|[^\w$])(?P<sig>[\w$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[\w$]{2}\.[\w$]{2}\(a,\d+\))?',
+             # Old patterns
+             r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[\w$]+)\(',
+             r'\b[\w]+\s*&&\s*[\w]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[\w$]+)\(',
+             r'\bm=(?P<sig>[\w$]{2,})\(decodeURIComponent\(h\.s\)\)',
             # Obsolete patterns
-             r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
+             r'("|\')signature\1\s*,\s*(?P<sig>[\w$]+)\(',
+             r'\.sig\|\|(?P<sig>[\w$]+)\(',
+             r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[\w$]+)\(',
+             r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[\w$]+)\(',
+             r'\bc\s*&&\s*[\w]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[\w$]+)\('),
            jscode, 'Initial JS player signature function name', group='sig')

        jsi = JSInterpreter(jscode)
@ -1658,36 +1694,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):

    def _extract_n_function_name(self, jscode):
        func_name, idx = self._search_regex(
-            # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
-            # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
-            # or:  (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
+            # (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
+            # (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
+            # or:  (b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
+            # or:  (b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c)
+            # or:  (PL(a),b=a.j.n||null)&&(b=narray[idx](b)
            # or:  (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
-            # old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
+            # old: (b=a.get("n"))&&(b=narray[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
            # older: (b=a.get("n"))&&(b=nfunc(b)
            r'''(?x)
-                \((?:[\w$()\s]+,)*?\s*      # (
-                (?P<b>[a-z])\s*=\s*         # b=
-                (?:
-                    (?:                     # expect ,c=a.get(b) (etc)
-                        String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
-                        "n+"\[\s*\+?s*[\w$.]+\s*]
-                    )\s*(?:,[\w$()\s]+(?=,))*|
-                       (?P<old>[\w$]+)      # a (old[er])
-                   )\s*
-                   (?(old)
-                                            # b.get("n")
-                       (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
-                       (?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
-                       |                    # ,c=a.get(b)
-                       ,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
-                       (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
-                       (?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
-                   )
-                                            # interstitial junk
-                   \s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
-               (?(c)(?P=c)|(?P=b))\s*=\s*   # [c|b]=
-                                            # nfunc|nfunc[idx]
-                   (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
+                # (expr, ...,
+                \((?:(?:\s*[\w$]+\s*=)?(?:[\w$"+\.\s(\[]+(?:[)\]]\s*)?),)*
+                  # b=...
+                  (?P<b>[\w$]+)\s*=\s*(?!(?P=b)[^\w$])[\w$]+\s*(?:(?:
+                    \.\s*[\w$]+ |
+                    \[\s*[\w$]+\s*\] |
+                    \.\s*get\s*\(\s*[\w$"]+\s*\)
+                  )\s*){,2}(?:\s*\|\|\s*null(?=\s*\)))?\s*
+                \)\s*&&\s*\(        # ...)&&(
+                # b = nfunc, b = narray[idx]
+                (?P=b)\s*=\s*(?P<nfunc>[\w$]+)\s*
+                    (?:\[\s*(?P<idx>[\w$]+)\s*\]\s*)?
+                    # (...)
+                    \(\s*[\w$]+\s*\)
            ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
            default=(None, None))
        # thx bashonly: yt-dlp/yt-dlp/pull/10611
@ -1697,15 +1726,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                r'''(?xs)
                    (?:(?<=[^\w$])|^)       # instead of \b, which ignores $
                    (?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
-                    \s*\{(?:(?!};).)+?["']enhanced_except_
+                    \s*\{(?:(?!};).)+?(?:
+                        ["']enhanced_except_ |
+                        return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+
+                    )
                ''', jscode, 'Initial JS player n function name', group='name')
        if not idx:
            return func_name

-        return self._parse_json(self._search_regex(
-            r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
-            'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
-            func_name, transform_source=js_to_json)[int(idx)]
+        return self._search_json(
+            r'var\s+{0}\s*='.format(re.escape(func_name)), jscode,
+            'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
+            func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]',
+            transform_source=js_to_json)[int(idx)]

    def _extract_n_function_code(self, video_id, player_url):
        player_id = self._extract_player_info(player_url)
@ -1728,13 +1761,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):

        def extract_nsig(s):
            try:
-                ret = func([s])
+                ret = func([s], kwargs={'_ytdl_do_not_return': s})
            except JSInterpreter.Exception:
                raise
            except Exception as e:
                raise JSInterpreter.Exception(traceback.format_exc(), cause=e)

-            if ret.startswith('enhanced_except_'):
+            if ret.startswith('enhanced_except_') or ret.endswith(s):
                raise JSInterpreter.Exception('Signature function returned an exception')
            return ret

@ -1910,9 +1943,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            player_response = self._extract_yt_initial_variable(
                webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
                video_id, 'initial player response')
-        if not player_response:
+        if False and not player_response:
            player_response = self._call_api(
                'player', {'videoId': video_id}, video_id)
+        if True or not player_response:
+            origin = 'https://www.youtube.com'
+            pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
+
+            player_url = self._extract_player_url(webpage)
+            ytcfg = self._extract_ytcfg(video_id, webpage)
+            sts = self._extract_signature_timestamp(video_id, player_url, ytcfg)
+            if sts:
+                pb_context['signatureTimestamp'] = sts
+
+            query = {
+                'playbackContext': {
+                    'contentPlaybackContext': pb_context,
+                    'contentCheckOk': True,
+                    'racyCheckOk': True,
+                },
+                'context': {
+                    'client': {
+                        'clientName': 'MWEB',
+                        'clientVersion': '2.20241202.07.00',
+                        'hl': 'en',
+                        'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
+                        'timeZone': 'UTC',
+                        'utcOffsetMinutes': 0,
+                    },
+                },
+                'videoId': video_id,
+            }
+            headers = {
+                'X-YouTube-Client-Name': '2',
+                'X-YouTube-Client-Version': '2.20241202.07.00',
+                'Origin': origin,
+                'Sec-Fetch-Mode': 'navigate',
+                'User-Agent': query['context']['client']['userAgent'],
+            }
+            auth = self._generate_sapisidhash_header(origin)
+            if auth is not None:
+                headers['Authorization'] = auth
+                headers['X-Origin'] = origin
+
+            player_response = self._call_api('player', query, video_id, fatal=False, headers=headers)

        def is_agegated(playability):
            if not isinstance(playability, dict):
@ -2219,12 +2293,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                        formats.append(f)

        playable_formats = [f for f in formats if not f.get('has_drm')]
-        if formats and not playable_formats:
+        if formats:
+            if not playable_formats:
                # If there are no formats that definitely don't have DRM, all have DRM
                self.report_drm(video_id)
            formats[:] = playable_formats
-
-        if not formats:
+        else:
            if streaming_data.get('licenseInfos'):
                raise ExtractorError(
                    'This video is DRM protected.', expected=True)
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals

 import itertools
@ -5,11 +6,12 @@ import json
 import operator
 import re

-from functools import update_wrapper
+from functools import update_wrapper, wraps

 from .utils import (
    error_to_compat_str,
    ExtractorError,
+    float_or_none,
    js_to_json,
    remove_quotes,
    unified_timestamp,
@ -20,9 +22,11 @@ from .compat import (
    compat_basestring,
    compat_chr,
    compat_collections_chain_map as ChainMap,
+    compat_contextlib_suppress,
    compat_filter as filter,
    compat_itertools_zip_longest as zip_longest,
    compat_map as map,
+    compat_numeric_types,
    compat_str,
 )

@ -62,6 +66,10 @@ _NaN = float('nan')
 _Infinity = float('inf')


+class JS_Undefined(object):
+    pass
+
+
 def _js_bit_op(op):

    def zeroise(x):
@ -74,43 +82,114 @@ def _js_bit_op(op):
    return wrapped


-def _js_arith_op(op):
+def _js_arith_op(op, div=False):

    @wraps_op(op)
    def wrapped(a, b):
        if JS_Undefined in (a, b):
            return _NaN
-        return op(a or 0, b or 0)
+        # null, "" --> 0
+        a, b = (float_or_none(
+            (x.strip() if isinstance(x, compat_basestring) else x) or 0,
+            default=_NaN) for x in (a, b))
+        if _NaN in (a, b):
+            return _NaN
+        try:
+            return op(a, b)
+        except ZeroDivisionError:
+            return _NaN if not (div and (a or b)) else _Infinity

    return wrapped


-def _js_div(a, b):
-    if JS_Undefined in (a, b) or not (a or b):
-        return _NaN
-    return operator.truediv(a or 0, b) if b else _Infinity
+_js_arith_add = _js_arith_op(operator.add)


-def _js_mod(a, b):
-    if JS_Undefined in (a, b) or not b:
-        return _NaN
-    return (a or 0) % b
+def _js_add(a, b):
+    if not (isinstance(a, compat_basestring) or isinstance(b, compat_basestring)):
+        return _js_arith_add(a, b)
+    if not isinstance(a, compat_basestring):
+        a = _js_toString(a)
+    elif not isinstance(b, compat_basestring):
+        b = _js_toString(b)
+    return operator.concat(a, b)
+
+
+_js_mod = _js_arith_op(operator.mod)
+__js_exp = _js_arith_op(operator.pow)


 def _js_exp(a, b):
    if not b:
        return 1  # even 0 ** 0 !!
-    elif JS_Undefined in (a, b):
-        return _NaN
-    return (a or 0) ** b
+    return __js_exp(a, b)


-def _js_eq_op(op):
+def _js_to_primitive(v):
+    return (
+        ','.join(map(_js_toString, v)) if isinstance(v, list)
+        else '[object Object]' if isinstance(v, dict)
+        else compat_str(v) if not isinstance(v, (
+            compat_numeric_types, compat_basestring))
+        else v
+    )
+
+
+def _js_toString(v):
+    return (
+        'undefined' if v is JS_Undefined
+        else 'Infinity' if v == _Infinity
+        else 'NaN' if v is _NaN
+        else 'null' if v is None
+        # bool <= int: do this first
+        else ('false', 'true')[v] if isinstance(v, bool)
+        else '{0:.7f}'.format(v).rstrip('.0') if isinstance(v, compat_numeric_types)
+        else _js_to_primitive(v))
+
+
+_nullish = frozenset((None, JS_Undefined))
+
+
+def _js_eq(a, b):
+    # NaN != any
+    if _NaN in (a, b):
+        return False
+    # Object is Object
+    if isinstance(a, type(b)) and isinstance(b, (dict, list)):
+        return operator.is_(a, b)
+    # general case
+    if a == b:
+        return True
+    # null == undefined
+    a_b = set((a, b))
+    if a_b & _nullish:
+        return a_b <= _nullish
+    a, b = _js_to_primitive(a), _js_to_primitive(b)
+    if not isinstance(a, compat_basestring):
+        a, b = b, a
+    # Number to String: convert the string to a number
+    # Conversion failure results in ... false
+    if isinstance(a, compat_basestring):
+        return float_or_none(a) == b
+    return a == b
+
+
+def _js_neq(a, b):
+    return not _js_eq(a, b)
+
+
+def _js_id_op(op):

    @wraps_op(op)
    def wrapped(a, b):
-        if set((a, b)) <= set((None, JS_Undefined)):
-            return op(a, a)
+        if _NaN in (a, b):
+            return op(_NaN, None)
+        if not isinstance(a, (compat_basestring, compat_numeric_types)):
+            a, b = b, a
+        # strings are === if ==
+        # why 'a' is not 'a': https://stackoverflow.com/a/1504848
+        if isinstance(a, (compat_basestring, compat_numeric_types)):
+            return a == b if op(0, 0) else a != b
        return op(a, b)

    return wrapped
@ -138,25 +217,57 @@ def _js_ternary(cndn, if_true=True, if_false=False):
    return if_true


+def _js_unary_op(op):
+
+    @wraps_op(op)
+    def wrapped(_, a):
+        return op(a)
+
+    return wrapped
+
+
+# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/typeof
+def _js_typeof(expr):
+    with compat_contextlib_suppress(TypeError, KeyError):
+        return {
+            JS_Undefined: 'undefined',
+            _NaN: 'number',
+            _Infinity: 'number',
+            True: 'boolean',
+            False: 'boolean',
+            None: 'object',
+        }[expr]
+    for t, n in (
+        (compat_basestring, 'string'),
+        (compat_numeric_types, 'number'),
+    ):
+        if isinstance(expr, t):
+            return n
+    if callable(expr):
+        return 'function'
+    # TODO: Symbol, BigInt
+    return 'object'
+
+
 # (op, definition) in order of binding priority, tightest first
 # avoid dict to maintain order
 # definition None => Defined in JSInterpreter._operator
 _OPERATORS = (
    ('>>', _js_bit_op(operator.rshift)),
    ('<<', _js_bit_op(operator.lshift)),
-    ('+', _js_arith_op(operator.add)),
+    ('+', _js_add),
    ('-', _js_arith_op(operator.sub)),
    ('*', _js_arith_op(operator.mul)),
    ('%', _js_mod),
-    ('/', _js_div),
+    ('/', _js_arith_op(operator.truediv, div=True)),
    ('**', _js_exp),
 )

 _COMP_OPERATORS = (
-    ('===', operator.is_),
-    ('!==', operator.is_not),
-    ('==', _js_eq_op(operator.eq)),
-    ('!=', _js_eq_op(operator.ne)),
+    ('===', _js_id_op(operator.is_)),
+    ('!==', _js_id_op(operator.is_not)),
+    ('==', _js_eq),
+    ('!=', _js_neq),
    ('<=', _js_comp_op(operator.le)),
    ('>=', _js_comp_op(operator.ge)),
    ('<', _js_comp_op(operator.lt)),
@ -176,6 +287,11 @@ _SC_OPERATORS = (
    ('&&', None),
 )

+_UNARY_OPERATORS_X = (
+    ('void', _js_unary_op(lambda _: JS_Undefined)),
+    ('typeof', _js_unary_op(_js_typeof)),
+)
+
 _OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))

 _NAME_RE = r'[a-zA-Z_$][\w$]*'
@ -183,10 +299,6 @@ _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
 _QUOTES = '\'"/'


-class JS_Undefined(object):
-    pass
-
-
 class JS_Break(ExtractorError):
    def __init__(self):
        ExtractorError.__init__(self, 'Invalid break')
@ -242,6 +354,7 @@ class Debugger(object):

    @classmethod
    def wrap_interpreter(cls, f):
+        @wraps(f)
        def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
            if cls.ENABLED and stmt.strip():
                cls.write(stmt, level=allow_recursion)
@ -255,7 +368,7 @@ class Debugger(object):
                raise
            if cls.ENABLED and stmt.strip():
                if should_ret or repr(ret) != stmt:
-                    cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
+                    cls.write(['->', '=>'][bool(should_ret)], repr(ret), '<-|', stmt, level=allow_recursion)
            return ret, should_ret
        return interpret_statement

@ -284,6 +397,9 @@ class JSInterpreter(object):
        RE_FLAGS = {
            # special knowledge: Python's re flags are bitmask values, current max 128
            # invent new bitmask values well above that for literal parsing
+            # JS 'u' flag is effectively always set (surrogate pairs aren't seen),
+            # but \u{...} and \p{...} escapes aren't handled); no additional JS 'v'
+            # features are supported
            # TODO: execute matches with these flags (remaining: d, y)
            'd': 1024,  # Generate indices for substring matches
            'g': 2048,  # Global search
@ -291,6 +407,7 @@ class JSInterpreter(object):
            'm': re.M,  # Multi-line search
            's': re.S,  # Allows . to match newline characters
            'u': re.U,  # Treat a pattern as a sequence of unicode code points
+            'v': re.U,  # Like 'u' with extended character class and \p{} syntax
            'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
        }

@ -347,6 +464,8 @@ class JSInterpreter(object):
    def __op_chars(cls):
        op_chars = set(';,[')
        for op in cls._all_operators():
+            if op[0].isalpha():
+                continue
            op_chars.update(op[0])
        return op_chars

@ -369,9 +488,18 @@ class JSInterpreter(object):
        skipping = 0
        if skip_delims:
            skip_delims = variadic(skip_delims)
+        skip_txt = None
        for idx, char in enumerate(expr):
+            if skip_txt and idx <= skip_txt[1]:
+                continue
            paren_delta = 0
            if not in_quote:
+                if char == '/' and expr[idx:idx + 2] == '/*':
+                    # skip a comment
+                    skip_txt = expr[idx:].find('*/', 2)
+                    skip_txt = [idx, idx + skip_txt + 1] if skip_txt >= 2 else None
+                    if skip_txt:
+                        continue
                if char in _MATCHING_PARENS:
                    counters[_MATCHING_PARENS[char]] += 1
                    paren_delta = 1
@ -404,11 +532,18 @@ class JSInterpreter(object):
            if pos < delim_len:
                pos += 1
                continue
+            if skip_txt and skip_txt[0] >= start and skip_txt[1] <= idx - delim_len:
+                yield expr[start:skip_txt[0]] + expr[skip_txt[1] + 1: idx - delim_len]
+            else:
                yield expr[start: idx - delim_len]
+            skip_txt = None
            start, pos = idx + 1, 0
            splits += 1
            if max_split and splits >= max_split:
                break
+        if skip_txt and skip_txt[0] >= start:
+            yield expr[start:skip_txt[0]] + expr[skip_txt[1] + 1:]
+        else:
            yield expr[start:]

    @classmethod
@ -425,7 +560,7 @@ class JSInterpreter(object):
        if not _cached:
            _cached.extend(itertools.chain(
                # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
-                _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
+                _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS, _UNARY_OPERATORS_X))
        return _cached

    def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
@ -449,13 +584,14 @@ class JSInterpreter(object):
        except Exception as e:
            raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)

-    def _index(self, obj, idx, allow_undefined=False):
-        if idx == 'length':
+    def _index(self, obj, idx, allow_undefined=True):
+        if idx == 'length' and isinstance(obj, list):
            return len(obj)
        try:
-            return obj[int(idx)] if isinstance(obj, list) else obj[idx]
-        except Exception as e:
+            return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
+        except (TypeError, KeyError, IndexError) as e:
            if allow_undefined:
+                # when is not allowed?
                return JS_Undefined
            raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)

@ -467,7 +603,7 @@ class JSInterpreter(object):

    # used below
    _VAR_RET_THROW_RE = re.compile(r'''(?x)
-        (?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["'])|$)|(?P<throw>throw\s+)
+        (?:(?P<var>var|const|let)\s+|(?P<ret>return)(?:\s+|(?=["'])|$)|(?P<throw>throw)\s+)
        ''')
    _COMPOUND_RE = re.compile(r'''(?x)
        (?P<try>try)\s*\{|
@ -479,6 +615,52 @@ class JSInterpreter(object):
    _FINALLY_RE = re.compile(r'finally\s*\{')
    _SWITCH_RE = re.compile(r'switch\s*\(')

+    def handle_operators(self, expr, local_vars, allow_recursion):
+
+        for op, _ in self._all_operators():
+            # hackety: </> have higher priority than <</>>, but don't confuse them
+            skip_delim = (op + op) if op in '<>*?' else None
+            if op == '?':
+                skip_delim = (skip_delim, '?.')
+            separated = list(self._separate(expr, op, skip_delims=skip_delim))
+            if len(separated) < 2:
+                continue
+
+            right_expr = separated.pop()
+            # handle operators that are both unary and binary, minimal BODMAS
+            if op in ('+', '-'):
+                # simplify/adjust consecutive instances of these operators
+                undone = 0
+                separated = [s.strip() for s in separated]
+                while len(separated) > 1 and not separated[-1]:
+                    undone += 1
+                    separated.pop()
+                if op == '-' and undone % 2 != 0:
+                    right_expr = op + right_expr
+                elif op == '+':
+                    while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
+                        right_expr = separated.pop() + right_expr
+                    if separated[-1][-1:] in self.OP_CHARS:
+                        right_expr = separated.pop() + right_expr
+                # hanging op at end of left => unary + (strip) or - (push right)
+                left_val = separated[-1] if separated else ''
+                for dm_op in ('*', '%', '/', '**'):
+                    bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
+                    if len(bodmas) > 1 and not bodmas[-1].strip():
+                        expr = op.join(separated) + op + right_expr
+                        if len(separated) > 1:
+                            separated.pop()
+                            right_expr = op.join((left_val, right_expr))
+                        else:
+                            separated = [op.join((left_val, right_expr))]
+                            right_expr = None
+                        break
+                if right_expr is None:
+                    continue
+
+            left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
+            return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), True
+
    @Debugger.wrap_interpreter
    def interpret_statement(self, stmt, local_vars, allow_recursion=100):
        if allow_recursion < 0:
@ -501,7 +683,7 @@ class JSInterpreter(object):
            expr = stmt[len(m.group(0)):].strip()
            if m.group('throw'):
                raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
-            should_return = not m.group('var')
+            should_return = 'return' if m.group('ret') else False
        if not expr:
            return None, should_return

@ -533,9 +715,15 @@ class JSInterpreter(object):
            else:
                raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)

-        if expr.startswith('void '):
-            left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
-            return None, should_return
+        for op, _ in _UNARY_OPERATORS_X:
+            if not expr.startswith(op):
+                continue
+            operand = expr[len(op):]
+            if not operand or operand[0] != ' ':
+                continue
+            op_result = self.handle_operators(expr, local_vars, allow_recursion)
+            if op_result:
+                return op_result[0], should_return

        if expr.startswith('{'):
            inner, outer = self._separate_at_paren(expr)
@ -582,7 +770,7 @@ class JSInterpreter(object):
                if_expr, expr = self._separate_at_paren(expr)
            else:
                # may lose ... else ... because of ll.368-374
-                if_expr, expr = self._separate_at_paren(expr, delim=';')
+                if_expr, expr = self._separate_at_paren(' %s;' % (expr,), delim=';')
            else_expr = None
            m = re.match(r'else\s*(?P<block>\{)?', expr)
            if m:
@ -720,7 +908,7 @@ class JSInterpreter(object):
            start, end = m.span()
            sign = m.group('pre_sign') or m.group('post_sign')
            ret = local_vars[var]
-            local_vars[var] += 1 if sign[0] == '+' else -1
+            local_vars[var] = _js_add(ret, 1 if sign[0] == '+' else -1)
            if m.group('pre_sign'):
                ret = local_vars[var]
            expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
@ -730,13 +918,13 @@ class JSInterpreter(object):

        m = re.match(r'''(?x)
            (?P<assign>
-                (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
+                (?P<out>{_NAME_RE})(?:\[(?P<out_idx>(?:.+?\]\s*\[)*.+?)\])?\s*
                (?P<op>{_OPERATOR_RE})?
                =(?!=)(?P<expr>.*)$
            )|(?P<return>
                (?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
            )|(?P<indexing>
-                (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
+                (?P<in>{_NAME_RE})\[(?P<in_idx>(?:.+?\]\s*\[)*.+?)\]$
            )|(?P<attribute>
                (?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
            )|(?P<function>
@ -746,19 +934,23 @@ class JSInterpreter(object):
        if md.get('assign'):
            left_val = local_vars.get(m.group('out'))

-            if not m.group('index'):
+            if not m.group('out_idx'):
                local_vars[m.group('out')] = self._operator(
                    m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
                return local_vars[m.group('out')], should_return
            elif left_val in (None, JS_Undefined):
                raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)

-            idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
-            if not isinstance(idx, (int, float)):
-                raise self.Exception('List index %s must be integer' % (idx, ), expr=expr)
+            indexes = re.split(r'\]\s*\[', m.group('out_idx'))
+            for i, idx in enumerate(indexes, 1):
+                idx = self.interpret_expression(idx, local_vars, allow_recursion)
+                if i < len(indexes):
+                    left_val = self._index(left_val, idx)
+            if isinstance(idx, float):
                idx = int(idx)
            left_val[idx] = self._operator(
-                m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
+                m.group('op'), self._index(left_val, idx) if m.group('op') else None,
+                m.group('expr'), expr, local_vars, allow_recursion)
            return left_val[idx], should_return

        elif expr.isdigit():
@ -776,63 +968,31 @@ class JSInterpreter(object):
            return _Infinity, should_return

        elif md.get('return'):
-            return local_vars[m.group('name')], should_return
+            ret = local_vars[m.group('name')]
+            # challenge may try to force returning the original value
+            # use an optional internal var to block this
+            if should_return == 'return':
+                if '_ytdl_do_not_return' not in local_vars:
+                    return ret, True
+                return (ret, True) if ret != local_vars['_ytdl_do_not_return'] else (ret, False)
+            else:
+                return ret, should_return

-        try:
+        with compat_contextlib_suppress(ValueError):
            ret = json.loads(js_to_json(expr))  # strict=True)
            if not md.get('attribute'):
                return ret, should_return
-        except ValueError:
-            pass

        if md.get('indexing'):
            val = local_vars[m.group('in')]
-            idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
-            return self._index(val, idx), should_return
+            for idx in re.split(r'\]\s*\[', m.group('in_idx')):
+                idx = self.interpret_expression(idx, local_vars, allow_recursion)
+                val = self._index(val, idx)
+            return val, should_return

-        for op, _ in self._all_operators():
-            # hackety: </> have higher priority than <</>>, but don't confuse them
-            skip_delim = (op + op) if op in '<>*?' else None
-            if op == '?':
-                skip_delim = (skip_delim, '?.')
-            separated = list(self._separate(expr, op, skip_delims=skip_delim))
-            if len(separated) < 2:
-                continue
-
-            right_expr = separated.pop()
-            # handle operators that are both unary and binary, minimal BODMAS
-            if op in ('+', '-'):
-                # simplify/adjust consecutive instances of these operators
-                undone = 0
-                separated = [s.strip() for s in separated]
-                while len(separated) > 1 and not separated[-1]:
-                    undone += 1
-                    separated.pop()
-                if op == '-' and undone % 2 != 0:
-                    right_expr = op + right_expr
-                elif op == '+':
-                    while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
-                        right_expr = separated.pop() + right_expr
-                    if separated[-1][-1:] in self.OP_CHARS:
-                        right_expr = separated.pop() + right_expr
-                # hanging op at end of left => unary + (strip) or - (push right)
-                left_val = separated[-1] if separated else ''
-                for dm_op in ('*', '%', '/', '**'):
-                    bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
-                    if len(bodmas) > 1 and not bodmas[-1].strip():
-                        expr = op.join(separated) + op + right_expr
-                        if len(separated) > 1:
-                            separated.pop()
-                            right_expr = op.join((left_val, right_expr))
-                        else:
-                            separated = [op.join((left_val, right_expr))]
-                            right_expr = None
-                        break
-                if right_expr is None:
-                    continue
-
-            left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
-            return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
+        op_result = self.handle_operators(expr, local_vars, allow_recursion)
+        if op_result:
+            return op_result[0], should_return

        if md.get('attribute'):
            variable, member, nullish = m.group('var', 'member', 'nullish')
@ -877,7 +1037,7 @@ class JSInterpreter(object):

                # Member access
                if arg_str is None:
-                    return self._index(obj, member, nullish)
+                    return self._index(obj, member)

                # Function call
                argvals = [
@ -904,7 +1064,7 @@ class JSInterpreter(object):
                if obj is compat_str:
                    if member == 'fromCharCode':
                        assertion(argvals, 'takes one or more arguments')
-                        return ''.join(map(compat_chr, argvals))
+                        return ''.join(compat_chr(int(n)) for n in argvals)
                    raise self.Exception('Unsupported string method ' + member, expr=expr)
                elif obj is float:
                    if member == 'pow':
@ -913,13 +1073,47 @@ class JSInterpreter(object):
                    raise self.Exception('Unsupported Math method ' + member, expr=expr)

                if member == 'split':
-                    assertion(argvals, 'takes one or more arguments')
-                    assertion(len(argvals) == 1, 'with limit argument is not implemented')
-                    return obj.split(argvals[0]) if argvals[0] else list(obj)
+                    assertion(len(argvals) <= 2, 'takes at most two arguments')
+                    if len(argvals) > 1:
+                        limit = argvals[1]
+                        assertion(isinstance(limit, int) and limit >= 0, 'integer limit >= 0')
+                        if limit == 0:
+                            return []
+                    else:
+                        limit = 0
+                    if len(argvals) == 0:
+                        argvals = [JS_Undefined]
+                    elif isinstance(argvals[0], self.JS_RegExp):
+                        # avoid re.split(), similar but not enough
+
+                        def where():
+                            for m in argvals[0].finditer(obj):
+                                yield m.span(0)
+                            yield (None, None)
+
+                        def splits(limit=limit):
+                            i = 0
+                            for j, jj in where():
+                                if j == jj == 0:
+                                    continue
+                                if j is None and i >= len(obj):
+                                    break
+                                yield obj[i:j]
+                                if jj is None or limit == 1:
+                                    break
+                                limit -= 1
+                                i = jj
+
+                        return list(splits())
+                    return (
+                        obj.split(argvals[0], limit - 1) if argvals[0] and argvals[0] != JS_Undefined
+                        else list(obj)[:limit or None])
                elif member == 'join':
                    assertion(isinstance(obj, list), 'must be applied on a list')
-                    assertion(len(argvals) == 1, 'takes exactly one argument')
-                    return argvals[0].join(obj)
+                    assertion(len(argvals) <= 1, 'takes at most one argument')
+                    return (',' if len(argvals) == 0 else argvals[0]).join(
+                        ('' if x in (None, JS_Undefined) else _js_toString(x))
+                        for x in obj)
                elif member == 'reverse':
                    assertion(not argvals, 'does not take any arguments')
                    obj.reverse()
@ -941,37 +1135,31 @@ class JSInterpreter(object):
                    index, how_many = map(int, (argvals + [len(obj)])[:2])
                    if index < 0:
                        index += len(obj)
-                    add_items = argvals[2:]
-                    res = []
-                    for _ in range(index, min(index + how_many, len(obj))):
-                        res.append(obj.pop(index))
-                    for i, item in enumerate(add_items):
-                        obj.insert(index + i, item)
+                    res = [obj.pop(index)
+                           for _ in range(index, min(index + how_many, len(obj)))]
+                    obj[index:index] = argvals[2:]
                    return res
-                elif member == 'unshift':
-                    assertion(isinstance(obj, list), 'must be applied on a list')
-                    assertion(argvals, 'takes one or more arguments')
-                    for item in reversed(argvals):
-                        obj.insert(0, item)
-                    return obj
-                elif member == 'pop':
+                elif member in ('shift', 'pop'):
                    assertion(isinstance(obj, list), 'must be applied on a list')
                    assertion(not argvals, 'does not take any arguments')
-                    if not obj:
-                        return
-                    return obj.pop()
+                    return obj.pop(0 if member == 'shift' else -1) if len(obj) > 0 else JS_Undefined
+                elif member == 'unshift':
+                    assertion(isinstance(obj, list), 'must be applied on a list')
+                    # not enforced: assertion(argvals, 'takes one or more arguments')
+                    obj[0:0] = argvals
+                    return len(obj)
                elif member == 'push':
-                    assertion(argvals, 'takes one or more arguments')
+                    # not enforced: assertion(argvals, 'takes one or more arguments')
                    obj.extend(argvals)
-                    return obj
+                    return len(obj)
                elif member == 'forEach':
                    assertion(argvals, 'takes one or more arguments')
-                    assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
+                    assertion(len(argvals) <= 2, 'takes at most 2 arguments')
                    f, this = (argvals + [''])[:2]
                    return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)]
                elif member == 'indexOf':
                    assertion(argvals, 'takes one or more arguments')
-                    assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
+                    assertion(len(argvals) <= 2, 'takes at most 2 arguments')
                    idx, start = (argvals + [0])[:2]
                    try:
                        return obj.index(idx, start)
@ -980,7 +1168,7 @@ class JSInterpreter(object):
                elif member == 'charCodeAt':
                    assertion(isinstance(obj, compat_str), 'must be applied on a string')
                    # assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced
-                    idx = argvals[0] if isinstance(argvals[0], int) else 0
+                    idx = argvals[0] if len(argvals) > 0 and isinstance(argvals[0], int) else 0
                    if idx >= len(obj):
                        return None
                    return ord(obj[idx])
@ -1031,7 +1219,7 @@ class JSInterpreter(object):
            yield self.interpret_expression(v, local_vars, allow_recursion)

    def extract_object(self, objname):
-        _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
+        _FUNC_NAME_RE = r'''(?:{n}|"{n}"|'{n}')'''.format(n=_NAME_RE)
        obj = {}
        fields = next(filter(None, (
            obj_m.group('fields') for obj_m in re.finditer(
@ -1090,6 +1278,7 @@ class JSInterpreter(object):

    def extract_function_from_code(self, argnames, code, *global_stack):
        local_vars = {}
+
        while True:
            mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
            if mobj is None:
@ -1100,10 +1289,11 @@ class JSInterpreter(object):
                [x.strip() for x in mobj.group('args').split(',')],
                body, local_vars, *global_stack))
            code = code[:start] + name + remaining
+
        return self.build_function(argnames, code, local_vars, *global_stack)

-    def call_function(self, funcname, *args):
-        return self.extract_function(funcname)(args)
+    def call_function(self, funcname, *args, **kw_global_vars):
+        return self.extract_function(funcname)(args, kw_global_vars)

    @classmethod
    def build_arglist(cls, arg_text):
@ -1122,8 +1312,9 @@ class JSInterpreter(object):
        global_stack = list(global_stack) or [{}]
        argnames = tuple(argnames)

-        def resf(args, kwargs={}, allow_recursion=100):
-            global_stack[0].update(zip_longest(argnames, args, fillvalue=None))
+        def resf(args, kwargs=None, allow_recursion=100):
+            kwargs = kwargs or {}
+            global_stack[0].update(zip_longest(argnames, args, fillvalue=JS_Undefined))
            global_stack[0].update(kwargs)
            var_stack = LocalNameSpace(*global_stack)
            ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
Author	SHA1	Message	Date
Kaspar V.	a097a5a865	Merge `a4759ae8ea` into `d55d1f423d`	2024-12-21 10:50:30 +01:00
dirkf	d55d1f423d	[YouTube] Always extract using MWEB API client * temporary fix-up for 403 on download * MWEB parameters from yt-dlp 2024-12-06	2024-12-16 12:38:51 +00:00
dirkf	eeafbbc3e5	[YouTube] Fix signature function extraction for `2f1832d2` * `_` was omitted from patterns * thx yt-dlp/yt-dlp#11801 Co-authored-by: bashonly	2024-12-16 12:38:51 +00:00
dirkf	cd7c7b5edb	[YouTube] Simplify pattern for nsig function name extraction	2024-12-16 12:38:51 +00:00
dirkf	eed784e15f	[YouTube] Pass nsig value as return hook, fixes player `3bb1f723`	2024-12-16 12:38:51 +00:00
dirkf	b4469a0f65	[YouTube] Handle player `3bb1f723` * fix signature code extraction * raise if n function returns input value * add new tests from yt-dlp Co-authored-by: bashonly	2024-12-16 12:38:51 +00:00
dirkf	ce1e556b8f	[jsinterp] Add return hook for player `3bb1f723` * set var `_ytdl_do_not_return` to a specific value in the scope of a function * if an expression to be returned has that value, `return` becomes `void`	2024-12-16 12:38:51 +00:00
dirkf	f487b4a02a	[jsinterp] Strip /* comments / when parsing NB: _separate() is looking creaky	2024-12-16 12:38:51 +00:00
dirkf	60835ca16c	[jsinterp] Fix and improve "methods" * push, unshift return new length * impove edge cases for push/pop, shift/unshift, forEach, indexOf, charCodeAt * increase test coverage	2024-12-16 12:38:51 +00:00
dirkf	94fd774608	[jsinterp] Fix and improve split/join * improve split/join edge cases * correctly implement regex split (not like re.split)	2024-12-16 12:38:51 +00:00
dirkf	5dee6213ed	[jsinterp] Fix and improve arithmetic operations * addition becomes concat with a string operand * improve handling of edgier cases * arithmetic in float like JS (more places need cast to int?) * increase test coverage	2024-12-16 12:38:51 +00:00
dirkf	81e64cacf2	[jsinterp] Support multiple indexing (eg a[1][2]) * extend single indexing with improved RE (should probably use/have used _separate_at_paren()) * fix some cases that should have given undefined, not throwing * standardise RE group names * support length of objects, like {1: 2, 3: 4, length: 42}	2024-12-16 12:38:51 +00:00
dirkf	c1a03b1ac3	[jsinterp] Fix and improve loose and strict equality operations * reimplement loose equality according to MDN (eg, 1 == "1") * improve strict equality (eg, "abc" === "abc" but 'abc' is not 'abc') * add tests for above	2024-12-16 12:38:51 +00:00
dirkf	118c6d7a17	[jsinterp] Implement `typeof` operator	2024-12-16 12:38:51 +00:00
dirkf	f28d7178e4	[InfoExtractor] Use kwarg maxsplit for re.split * May become kw-only in future Pythons	2024-12-16 12:38:51 +00:00
dirkf	a4759ae8ea	Fix ArteTVEmbedIE	2023-02-15 16:37:05 +00:00
dirkf	3bb4530d42	`langauge_code`	2023-02-15 00:29:39 +00:00
dirkf	b3b0520c82	Update Based on yt-dlp/yt-dlp 051d6b4 merging [extractor/arte] yt-dlp/yt-dlp#3302: Move to v2 API Authored by: fstirlitz, pukkandan	2023-02-15 00:24:38 +00:00
dirkf	cdcb4af1c3	Linted?	2023-02-14 18:29:26 +00:00
dirkf	82833b44d7	Merge branch 'master' into extractor/arte/improvement-switch-to-config-api-v2	2023-02-14 18:24:27 +00:00
dirkf	0c149f04d4	Update from yt-dlp	2023-02-14 18:18:47 +00:00
Kaspar Vollenweider	66eec4e640	WIP: arte config api v2	2021-07-26 07:56:29 +02:00