1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2026-05-25 06:45:18 +00:00

Merge branch 'ytdl-org:master' into version-gnu-V

This commit is contained in:
tansy
2024-07-11 15:51:09 +02:00
committed by GitHub
5 changed files with 370 additions and 483 deletions
+10 -14
View File
@@ -1636,7 +1636,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try:
jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
except ExtractorError as e:
raise ExtractorError('Unable to extract nsig jsi, player_id, func_codefunction code', cause=e)
raise ExtractorError('Unable to extract nsig function code', cause=e)
if self.get_param('youtube_print_sig_code'):
self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(
player_id, func_code[1]))
@@ -1658,8 +1658,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex(
r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?\([\w$]+\)',
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
# old: .get("n"))&&(b=nfunc[idx](b)
# older: .get("n"))&&(b=nfunc(b)
r'''(?x)
(?:\(\s*(?P<b>[a-z])\s*=\s*String\s*\.\s*fromCharCode\s*\(\s*110\s*\)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
\.\s*get\s*\(\s*(?(b)(?P=b)|"n")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
if not idx:
return func_name
@@ -1679,17 +1685,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
func_name = self._extract_n_function_name(jscode)
# For redundancy
func_code = self._search_regex(
r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
# NB: The end of the regex is intentionally kept strict
{(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
jscode, 'nsig function', group=('var', 'code'), default=None)
if func_code:
func_code = ([func_code[0]], func_code[1])
else:
self.write_debug('Extracting nsig function with jsinterp')
func_code = jsi.extract_function_code(func_name)
func_code = jsi.extract_function_code(func_name)
self.cache.store('youtube-nsig', player_id, func_code)
return jsi, player_id, func_code
+53 -33
View File
@@ -20,7 +20,9 @@ from .compat import (
compat_basestring,
compat_chr,
compat_collections_chain_map as ChainMap,
compat_filter as filter,
compat_itertools_zip_longest as zip_longest,
compat_map as map,
compat_str,
)
@@ -252,7 +254,7 @@ class Debugger(object):
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
raise
if cls.ENABLED and stmt.strip():
if should_ret or not repr(ret) == stmt:
if should_ret or repr(ret) != stmt:
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
return ret, should_ret
return interpret_statement
@@ -365,6 +367,8 @@ class JSInterpreter(object):
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
skipping = 0
if skip_delims:
skip_delims = variadic(skip_delims)
for idx, char in enumerate(expr):
paren_delta = 0
if not in_quote:
@@ -391,7 +395,7 @@ class JSInterpreter(object):
continue
elif pos == 0 and skip_delims:
here = expr[idx:]
for s in variadic(skip_delims):
for s in skip_delims:
if here.startswith(s) and s:
skipping = len(s) - 1
break
@@ -412,7 +416,6 @@ class JSInterpreter(object):
if delim is None:
delim = expr and _MATCHING_PARENS[expr[0]]
separated = list(cls._separate(expr, delim, 1))
if len(separated) < 2:
raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals()))
return separated[0][1:].strip(), separated[1].strip()
@@ -487,6 +490,7 @@ class JSInterpreter(object):
# fails on (eg) if (...) stmt1; else stmt2;
sub_statements = list(self._separate(stmt, ';')) or ['']
expr = stmt = sub_statements.pop().strip()
for sub_stmt in sub_statements:
ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
if should_return:
@@ -626,8 +630,7 @@ class JSInterpreter(object):
if m.group('err'):
catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
catch_vars = local_vars.new_child(m=catch_vars)
err = None
pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion)
m = self._FINALLY_RE.match(expr)
if m:
@@ -801,16 +804,19 @@ class JSInterpreter(object):
if op in ('+', '-'):
# simplify/adjust consecutive instances of these operators
undone = 0
while len(separated) > 1 and not separated[-1].strip():
separated = [s.strip() for s in separated]
while len(separated) > 1 and not separated[-1]:
undone += 1
separated.pop()
if op == '-' and undone % 2 != 0:
right_expr = op + right_expr
elif op == '+':
while len(separated) > 1 and separated[-1].strip() in self.OP_CHARS:
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
right_expr = separated.pop() + right_expr
if separated[-1][-1:] in self.OP_CHARS:
right_expr = separated.pop() + right_expr
# hanging op at end of left => unary + (strip) or - (push right)
left_val = separated[-1]
left_val = separated[-1] if separated else ''
for dm_op in ('*', '%', '/', '**'):
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
if len(bodmas) > 1 and not bodmas[-1].strip():
@@ -844,7 +850,7 @@ class JSInterpreter(object):
memb = member
raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
def eval_method():
def eval_method(variable, member):
if (variable, member) == ('console', 'debug'):
if Debugger.ENABLED:
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
@@ -852,6 +858,7 @@ class JSInterpreter(object):
types = {
'String': compat_str,
'Math': float,
'Array': list,
}
obj = local_vars.get(variable)
if obj in (JS_Undefined, None):
@@ -877,12 +884,29 @@ class JSInterpreter(object):
self.interpret_expression(v, local_vars, allow_recursion)
for v in self._separate(arg_str)]
if obj == compat_str:
# Fixup prototype call
if isinstance(obj, type):
new_member, rest = member.partition('.')[0::2]
if new_member == 'prototype':
new_member, func_prototype = rest.partition('.')[0::2]
assertion(argvals, 'takes one or more arguments')
assertion(isinstance(argvals[0], obj), 'must bind to type {0}'.format(obj))
if func_prototype == 'call':
obj = argvals.pop(0)
elif func_prototype == 'apply':
assertion(len(argvals) == 2, 'takes two arguments')
obj, argvals = argvals
assertion(isinstance(argvals, list), 'second argument must be a list')
else:
raise self.Exception('Unsupported Function method ' + func_prototype, expr)
member = new_member
if obj is compat_str:
if member == 'fromCharCode':
assertion(argvals, 'takes one or more arguments')
return ''.join(map(compat_chr, argvals))
raise self.Exception('Unsupported string method ' + member, expr=expr)
elif obj == float:
elif obj is float:
if member == 'pow':
assertion(len(argvals) == 2, 'takes two arguments')
return argvals[0] ** argvals[1]
@@ -907,12 +931,12 @@ class JSInterpreter(object):
elif member == 'splice':
assertion(isinstance(obj, list), 'must be applied on a list')
assertion(argvals, 'takes one or more arguments')
index, howMany = map(int, (argvals + [len(obj)])[:2])
index, how_many = map(int, (argvals + [len(obj)])[:2])
if index < 0:
index += len(obj)
add_items = argvals[2:]
res = []
for i in range(index, min(index + howMany, len(obj))):
for _ in range(index, min(index + how_many, len(obj))):
res.append(obj.pop(index))
for i, item in enumerate(add_items):
obj.insert(index + i, item)
@@ -970,11 +994,11 @@ class JSInterpreter(object):
if remaining:
ret, should_abort = self.interpret_statement(
self._named_object(local_vars, eval_method()) + remaining,
self._named_object(local_vars, eval_method(variable, member)) + remaining,
local_vars, allow_recursion)
return ret, should_return or should_abort
else:
return eval_method(), should_return
return eval_method(variable, member), should_return
elif md.get('function'):
fname = m.group('fname')
@@ -1002,28 +1026,25 @@ class JSInterpreter(object):
def extract_object(self, objname):
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
obj = {}
fields = None
for obj_m in re.finditer(
fields = next(filter(None, (
obj_m.group('fields') for obj_m in re.finditer(
r'''(?xs)
{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
(?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
}}\s*;
'''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
self.code):
fields = obj_m.group('fields')
if fields:
break
else:
self.code))), None)
if not fields:
raise self.Exception('Could not find object ' + objname)
# Currently, it only supports function definitions
fields_m = re.finditer(
r'''(?x)
(?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
''' % (_FUNC_NAME_RE, _NAME_RE),
fields)
for f in fields_m:
for f in re.finditer(
r'''(?x)
(?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
''' % (_FUNC_NAME_RE, _NAME_RE),
fields):
argnames = self.build_arglist(f.group('args'))
obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
name = remove_quotes(f.group('key'))
obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), 'F<{0}>'.format(name))
return obj
@@ -1058,7 +1079,7 @@ class JSInterpreter(object):
def extract_function(self, funcname):
return function_with_repr(
self.extract_function_from_code(*self.extract_function_code(funcname)),
'F<%s>' % (funcname, ))
'F<%s>' % (funcname,))
def extract_function_from_code(self, argnames, code, *global_stack):
local_vars = {}
@@ -1067,7 +1088,7 @@ class JSInterpreter(object):
if mobj is None:
break
start, body_start = mobj.span()
body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
body, remaining = self._separate_at_paren(code[body_start - 1:])
name = self._named_object(local_vars, self.extract_function_from_code(
[x.strip() for x in mobj.group('args').split(',')],
body, local_vars, *global_stack))
@@ -1095,8 +1116,7 @@ class JSInterpreter(object):
argnames = tuple(argnames)
def resf(args, kwargs={}, allow_recursion=100):
global_stack[0].update(
zip_longest(argnames, args, fillvalue=None))
global_stack[0].update(zip_longest(argnames, args, fillvalue=None))
global_stack[0].update(kwargs)
var_stack = LocalNameSpace(*global_stack)
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
+34 -3
View File
@@ -6604,27 +6604,53 @@ class _UnsafeExtensionError(Exception):
),
# video
MEDIA_EXTENSIONS.video, (
'avif',
'asx',
'ismv',
'm2t',
'm2ts',
'm2v',
'm4s',
'mng',
'mp2v',
'mp4v',
'mpe',
'mpeg',
'mpeg1',
'mpeg2',
'mpeg4',
'mxf',
'ogm',
'qt',
'rm',
'swf',
'ts',
'vob',
'vp9',
'wvm',
),
# audio
MEDIA_EXTENSIONS.audio, (
'3ga',
'ac3',
'adts',
'aif',
'au',
'dts',
'isma',
'it',
'mid',
'mod',
'mpga',
'mp1',
'mp2',
'mp4a',
'mpa',
'ra',
'shn',
'xm',
),
# image
MEDIA_EXTENSIONS.thumbnails, (
'avif',
'bmp',
'gif',
'ico',
@@ -6634,6 +6660,7 @@ class _UnsafeExtensionError(Exception):
'jxl',
'svg',
'tif',
'tiff',
'wbmp',
),
# subtitle
@@ -6641,10 +6668,15 @@ class _UnsafeExtensionError(Exception):
'dfxp',
'fs',
'ismt',
'json3',
'sami',
'scc',
'srv1',
'srv2',
'srv3',
'ssa',
'tt',
'xml',
),
# others
MEDIA_EXTENSIONS.manifests,
@@ -6658,7 +6690,6 @@ class _UnsafeExtensionError(Exception):
# 'swp',
# 'url',
# 'webloc',
# 'xml',
)))
def __init__(self, extension):