mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2026-05-25 06:45:18 +00:00
Merge branch 'ytdl-org:master' into version-gnu-V
This commit is contained in:
@@ -1636,7 +1636,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
try:
|
||||
jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
|
||||
except ExtractorError as e:
|
||||
raise ExtractorError('Unable to extract nsig jsi, player_id, func_codefunction code', cause=e)
|
||||
raise ExtractorError('Unable to extract nsig function code', cause=e)
|
||||
if self.get_param('youtube_print_sig_code'):
|
||||
self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(
|
||||
player_id, func_code[1]))
|
||||
@@ -1658,8 +1658,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_n_function_name(self, jscode):
|
||||
func_name, idx = self._search_regex(
|
||||
r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?\([\w$]+\)',
|
||||
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
|
||||
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
|
||||
# old: .get("n"))&&(b=nfunc[idx](b)
|
||||
# older: .get("n"))&&(b=nfunc(b)
|
||||
r'''(?x)
|
||||
(?:\(\s*(?P<b>[a-z])\s*=\s*String\s*\.\s*fromCharCode\s*\(\s*110\s*\)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
|
||||
\.\s*get\s*\(\s*(?(b)(?P=b)|"n")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
|
||||
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
|
||||
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
|
||||
if not idx:
|
||||
return func_name
|
||||
|
||||
@@ -1679,17 +1685,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
func_name = self._extract_n_function_name(jscode)
|
||||
|
||||
# For redundancy
|
||||
func_code = self._search_regex(
|
||||
r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
|
||||
# NB: The end of the regex is intentionally kept strict
|
||||
{(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
|
||||
jscode, 'nsig function', group=('var', 'code'), default=None)
|
||||
if func_code:
|
||||
func_code = ([func_code[0]], func_code[1])
|
||||
else:
|
||||
self.write_debug('Extracting nsig function with jsinterp')
|
||||
func_code = jsi.extract_function_code(func_name)
|
||||
func_code = jsi.extract_function_code(func_name)
|
||||
|
||||
self.cache.store('youtube-nsig', player_id, func_code)
|
||||
return jsi, player_id, func_code
|
||||
|
||||
+53
-33
@@ -20,7 +20,9 @@ from .compat import (
|
||||
compat_basestring,
|
||||
compat_chr,
|
||||
compat_collections_chain_map as ChainMap,
|
||||
compat_filter as filter,
|
||||
compat_itertools_zip_longest as zip_longest,
|
||||
compat_map as map,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
@@ -252,7 +254,7 @@ class Debugger(object):
|
||||
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
|
||||
raise
|
||||
if cls.ENABLED and stmt.strip():
|
||||
if should_ret or not repr(ret) == stmt:
|
||||
if should_ret or repr(ret) != stmt:
|
||||
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
|
||||
return ret, should_ret
|
||||
return interpret_statement
|
||||
@@ -365,6 +367,8 @@ class JSInterpreter(object):
|
||||
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
||||
in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
|
||||
skipping = 0
|
||||
if skip_delims:
|
||||
skip_delims = variadic(skip_delims)
|
||||
for idx, char in enumerate(expr):
|
||||
paren_delta = 0
|
||||
if not in_quote:
|
||||
@@ -391,7 +395,7 @@ class JSInterpreter(object):
|
||||
continue
|
||||
elif pos == 0 and skip_delims:
|
||||
here = expr[idx:]
|
||||
for s in variadic(skip_delims):
|
||||
for s in skip_delims:
|
||||
if here.startswith(s) and s:
|
||||
skipping = len(s) - 1
|
||||
break
|
||||
@@ -412,7 +416,6 @@ class JSInterpreter(object):
|
||||
if delim is None:
|
||||
delim = expr and _MATCHING_PARENS[expr[0]]
|
||||
separated = list(cls._separate(expr, delim, 1))
|
||||
|
||||
if len(separated) < 2:
|
||||
raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals()))
|
||||
return separated[0][1:].strip(), separated[1].strip()
|
||||
@@ -487,6 +490,7 @@ class JSInterpreter(object):
|
||||
# fails on (eg) if (...) stmt1; else stmt2;
|
||||
sub_statements = list(self._separate(stmt, ';')) or ['']
|
||||
expr = stmt = sub_statements.pop().strip()
|
||||
|
||||
for sub_stmt in sub_statements:
|
||||
ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
|
||||
if should_return:
|
||||
@@ -626,8 +630,7 @@ class JSInterpreter(object):
|
||||
if m.group('err'):
|
||||
catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
|
||||
catch_vars = local_vars.new_child(m=catch_vars)
|
||||
err = None
|
||||
pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
|
||||
err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion)
|
||||
|
||||
m = self._FINALLY_RE.match(expr)
|
||||
if m:
|
||||
@@ -801,16 +804,19 @@ class JSInterpreter(object):
|
||||
if op in ('+', '-'):
|
||||
# simplify/adjust consecutive instances of these operators
|
||||
undone = 0
|
||||
while len(separated) > 1 and not separated[-1].strip():
|
||||
separated = [s.strip() for s in separated]
|
||||
while len(separated) > 1 and not separated[-1]:
|
||||
undone += 1
|
||||
separated.pop()
|
||||
if op == '-' and undone % 2 != 0:
|
||||
right_expr = op + right_expr
|
||||
elif op == '+':
|
||||
while len(separated) > 1 and separated[-1].strip() in self.OP_CHARS:
|
||||
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
|
||||
right_expr = separated.pop() + right_expr
|
||||
if separated[-1][-1:] in self.OP_CHARS:
|
||||
right_expr = separated.pop() + right_expr
|
||||
# hanging op at end of left => unary + (strip) or - (push right)
|
||||
left_val = separated[-1]
|
||||
left_val = separated[-1] if separated else ''
|
||||
for dm_op in ('*', '%', '/', '**'):
|
||||
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
|
||||
if len(bodmas) > 1 and not bodmas[-1].strip():
|
||||
@@ -844,7 +850,7 @@ class JSInterpreter(object):
|
||||
memb = member
|
||||
raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
|
||||
|
||||
def eval_method():
|
||||
def eval_method(variable, member):
|
||||
if (variable, member) == ('console', 'debug'):
|
||||
if Debugger.ENABLED:
|
||||
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
|
||||
@@ -852,6 +858,7 @@ class JSInterpreter(object):
|
||||
types = {
|
||||
'String': compat_str,
|
||||
'Math': float,
|
||||
'Array': list,
|
||||
}
|
||||
obj = local_vars.get(variable)
|
||||
if obj in (JS_Undefined, None):
|
||||
@@ -877,12 +884,29 @@ class JSInterpreter(object):
|
||||
self.interpret_expression(v, local_vars, allow_recursion)
|
||||
for v in self._separate(arg_str)]
|
||||
|
||||
if obj == compat_str:
|
||||
# Fixup prototype call
|
||||
if isinstance(obj, type):
|
||||
new_member, rest = member.partition('.')[0::2]
|
||||
if new_member == 'prototype':
|
||||
new_member, func_prototype = rest.partition('.')[0::2]
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(isinstance(argvals[0], obj), 'must bind to type {0}'.format(obj))
|
||||
if func_prototype == 'call':
|
||||
obj = argvals.pop(0)
|
||||
elif func_prototype == 'apply':
|
||||
assertion(len(argvals) == 2, 'takes two arguments')
|
||||
obj, argvals = argvals
|
||||
assertion(isinstance(argvals, list), 'second argument must be a list')
|
||||
else:
|
||||
raise self.Exception('Unsupported Function method ' + func_prototype, expr)
|
||||
member = new_member
|
||||
|
||||
if obj is compat_str:
|
||||
if member == 'fromCharCode':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
return ''.join(map(compat_chr, argvals))
|
||||
raise self.Exception('Unsupported string method ' + member, expr=expr)
|
||||
elif obj == float:
|
||||
elif obj is float:
|
||||
if member == 'pow':
|
||||
assertion(len(argvals) == 2, 'takes two arguments')
|
||||
return argvals[0] ** argvals[1]
|
||||
@@ -907,12 +931,12 @@ class JSInterpreter(object):
|
||||
elif member == 'splice':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
index, howMany = map(int, (argvals + [len(obj)])[:2])
|
||||
index, how_many = map(int, (argvals + [len(obj)])[:2])
|
||||
if index < 0:
|
||||
index += len(obj)
|
||||
add_items = argvals[2:]
|
||||
res = []
|
||||
for i in range(index, min(index + howMany, len(obj))):
|
||||
for _ in range(index, min(index + how_many, len(obj))):
|
||||
res.append(obj.pop(index))
|
||||
for i, item in enumerate(add_items):
|
||||
obj.insert(index + i, item)
|
||||
@@ -970,11 +994,11 @@ class JSInterpreter(object):
|
||||
|
||||
if remaining:
|
||||
ret, should_abort = self.interpret_statement(
|
||||
self._named_object(local_vars, eval_method()) + remaining,
|
||||
self._named_object(local_vars, eval_method(variable, member)) + remaining,
|
||||
local_vars, allow_recursion)
|
||||
return ret, should_return or should_abort
|
||||
else:
|
||||
return eval_method(), should_return
|
||||
return eval_method(variable, member), should_return
|
||||
|
||||
elif md.get('function'):
|
||||
fname = m.group('fname')
|
||||
@@ -1002,28 +1026,25 @@ class JSInterpreter(object):
|
||||
def extract_object(self, objname):
|
||||
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
||||
obj = {}
|
||||
fields = None
|
||||
for obj_m in re.finditer(
|
||||
fields = next(filter(None, (
|
||||
obj_m.group('fields') for obj_m in re.finditer(
|
||||
r'''(?xs)
|
||||
{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
|
||||
(?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
|
||||
}}\s*;
|
||||
'''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
|
||||
self.code):
|
||||
fields = obj_m.group('fields')
|
||||
if fields:
|
||||
break
|
||||
else:
|
||||
self.code))), None)
|
||||
if not fields:
|
||||
raise self.Exception('Could not find object ' + objname)
|
||||
# Currently, it only supports function definitions
|
||||
fields_m = re.finditer(
|
||||
r'''(?x)
|
||||
(?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
|
||||
''' % (_FUNC_NAME_RE, _NAME_RE),
|
||||
fields)
|
||||
for f in fields_m:
|
||||
for f in re.finditer(
|
||||
r'''(?x)
|
||||
(?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
|
||||
''' % (_FUNC_NAME_RE, _NAME_RE),
|
||||
fields):
|
||||
argnames = self.build_arglist(f.group('args'))
|
||||
obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
|
||||
name = remove_quotes(f.group('key'))
|
||||
obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), 'F<{0}>'.format(name))
|
||||
|
||||
return obj
|
||||
|
||||
@@ -1058,7 +1079,7 @@ class JSInterpreter(object):
|
||||
def extract_function(self, funcname):
|
||||
return function_with_repr(
|
||||
self.extract_function_from_code(*self.extract_function_code(funcname)),
|
||||
'F<%s>' % (funcname, ))
|
||||
'F<%s>' % (funcname,))
|
||||
|
||||
def extract_function_from_code(self, argnames, code, *global_stack):
|
||||
local_vars = {}
|
||||
@@ -1067,7 +1088,7 @@ class JSInterpreter(object):
|
||||
if mobj is None:
|
||||
break
|
||||
start, body_start = mobj.span()
|
||||
body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
|
||||
body, remaining = self._separate_at_paren(code[body_start - 1:])
|
||||
name = self._named_object(local_vars, self.extract_function_from_code(
|
||||
[x.strip() for x in mobj.group('args').split(',')],
|
||||
body, local_vars, *global_stack))
|
||||
@@ -1095,8 +1116,7 @@ class JSInterpreter(object):
|
||||
argnames = tuple(argnames)
|
||||
|
||||
def resf(args, kwargs={}, allow_recursion=100):
|
||||
global_stack[0].update(
|
||||
zip_longest(argnames, args, fillvalue=None))
|
||||
global_stack[0].update(zip_longest(argnames, args, fillvalue=None))
|
||||
global_stack[0].update(kwargs)
|
||||
var_stack = LocalNameSpace(*global_stack)
|
||||
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
|
||||
|
||||
+34
-3
@@ -6604,27 +6604,53 @@ class _UnsafeExtensionError(Exception):
|
||||
),
|
||||
# video
|
||||
MEDIA_EXTENSIONS.video, (
|
||||
'avif',
|
||||
'asx',
|
||||
'ismv',
|
||||
'm2t',
|
||||
'm2ts',
|
||||
'm2v',
|
||||
'm4s',
|
||||
'mng',
|
||||
'mp2v',
|
||||
'mp4v',
|
||||
'mpe',
|
||||
'mpeg',
|
||||
'mpeg1',
|
||||
'mpeg2',
|
||||
'mpeg4',
|
||||
'mxf',
|
||||
'ogm',
|
||||
'qt',
|
||||
'rm',
|
||||
'swf',
|
||||
'ts',
|
||||
'vob',
|
||||
'vp9',
|
||||
'wvm',
|
||||
),
|
||||
# audio
|
||||
MEDIA_EXTENSIONS.audio, (
|
||||
'3ga',
|
||||
'ac3',
|
||||
'adts',
|
||||
'aif',
|
||||
'au',
|
||||
'dts',
|
||||
'isma',
|
||||
'it',
|
||||
'mid',
|
||||
'mod',
|
||||
'mpga',
|
||||
'mp1',
|
||||
'mp2',
|
||||
'mp4a',
|
||||
'mpa',
|
||||
'ra',
|
||||
'shn',
|
||||
'xm',
|
||||
),
|
||||
# image
|
||||
MEDIA_EXTENSIONS.thumbnails, (
|
||||
'avif',
|
||||
'bmp',
|
||||
'gif',
|
||||
'ico',
|
||||
@@ -6634,6 +6660,7 @@ class _UnsafeExtensionError(Exception):
|
||||
'jxl',
|
||||
'svg',
|
||||
'tif',
|
||||
'tiff',
|
||||
'wbmp',
|
||||
),
|
||||
# subtitle
|
||||
@@ -6641,10 +6668,15 @@ class _UnsafeExtensionError(Exception):
|
||||
'dfxp',
|
||||
'fs',
|
||||
'ismt',
|
||||
'json3',
|
||||
'sami',
|
||||
'scc',
|
||||
'srv1',
|
||||
'srv2',
|
||||
'srv3',
|
||||
'ssa',
|
||||
'tt',
|
||||
'xml',
|
||||
),
|
||||
# others
|
||||
MEDIA_EXTENSIONS.manifests,
|
||||
@@ -6658,7 +6690,6 @@ class _UnsafeExtensionError(Exception):
|
||||
# 'swp',
|
||||
# 'url',
|
||||
# 'webloc',
|
||||
# 'xml',
|
||||
)))
|
||||
|
||||
def __init__(self, extension):
|
||||
|
||||
Reference in New Issue
Block a user