mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-22 07:56:49 +00:00
Compare commits
41 Commits
c937458d80
...
4897a7e529
Author | SHA1 | Date | |
---|---|---|---|
|
4897a7e529 | ||
|
c5098961b0 | ||
|
dbc08fba83 | ||
|
71223bff39 | ||
|
e1b3fa242c | ||
|
451046d62a | ||
|
16f5bbc464 | ||
|
d35ce6ce95 | ||
|
76ac69917e | ||
|
756f6b45c7 | ||
|
43a74c5fa5 | ||
|
a452f9437c | ||
|
36801c62df | ||
|
f4b47754d9 | ||
|
37cea84f77 | ||
|
4652109643 | ||
|
3c466186a8 | ||
|
4d05f84325 | ||
|
e0094e63c3 | ||
|
fd8242e3ef | ||
|
ad01fa6cca | ||
|
2eac0fa379 | ||
|
0153b387e5 | ||
|
a48fe7491d | ||
|
e20ca543f0 | ||
|
e39466051f | ||
|
d95c0d203f | ||
|
3bde6a5752 | ||
|
50f6c5668a | ||
|
b4ff08bd2d | ||
|
88bd8b9f87 | ||
|
21924742f7 | ||
|
768ccccd9b | ||
|
eee9a247eb | ||
|
34484e49f5 | ||
|
06da64ee51 | ||
|
a08f2b7e45 | ||
|
668332b973 | ||
|
0b2ce3685e | ||
|
c2766cb80e | ||
|
eb38665438 |
3
.github/workflows/ci.yml
vendored
3
.github/workflows/ci.yml
vendored
@ -159,6 +159,9 @@ jobs:
|
|||||||
# wrap broken actions/setup-python@v4
|
# wrap broken actions/setup-python@v4
|
||||||
# NB may run apt-get install in Linux
|
# NB may run apt-get install in Linux
|
||||||
uses: ytdl-org/setup-python@v1
|
uses: ytdl-org/setup-python@v1
|
||||||
|
env:
|
||||||
|
# Temporary workaround for Python 3.5 failures - May 2024
|
||||||
|
PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
cache-build: true
|
cache-build: true
|
||||||
|
@ -255,12 +255,15 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
|||||||
--restrict-filenames Restrict filenames to only ASCII
|
--restrict-filenames Restrict filenames to only ASCII
|
||||||
characters, and avoid "&" and spaces in
|
characters, and avoid "&" and spaces in
|
||||||
filenames
|
filenames
|
||||||
-w, --no-overwrites Do not overwrite files
|
-w, --no-overwrites Do not overwrite files. (For the
|
||||||
|
opposite, see --no-continue.)
|
||||||
-c, --continue Force resume of partially downloaded
|
-c, --continue Force resume of partially downloaded
|
||||||
files. By default, youtube-dl will
|
files. By default, youtube-dl will
|
||||||
resume downloads if possible.
|
resume downloads if possible.
|
||||||
--no-continue Do not resume partially downloaded
|
--no-continue Do not resume partially downloaded
|
||||||
files (restart from beginning)
|
files: Instead, restart from the
|
||||||
|
beginning and ruthlessly overwrite
|
||||||
|
existing files.
|
||||||
--no-part Do not use .part files - write directly
|
--no-part Do not use .part files - write directly
|
||||||
into output file
|
into output file
|
||||||
--no-mtime Do not use the Last-modified header to
|
--no-mtime Do not use the Last-modified header to
|
||||||
|
@ -5,9 +5,9 @@ import hashlib
|
|||||||
import json
|
import json
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import types
|
|
||||||
import ssl
|
import ssl
|
||||||
import sys
|
import sys
|
||||||
|
import types
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import youtube_dl.extractor
|
import youtube_dl.extractor
|
||||||
@ -181,18 +181,18 @@ def expect_value(self, got, expected, field):
|
|||||||
op, _, expected_num = expected.partition(':')
|
op, _, expected_num = expected.partition(':')
|
||||||
expected_num = int(expected_num)
|
expected_num = int(expected_num)
|
||||||
if op == 'mincount':
|
if op == 'mincount':
|
||||||
assert_func = assertGreaterEqual
|
assert_func = self.assertGreaterEqual
|
||||||
msg_tmpl = 'Expected %d items in field %s, but only got %d'
|
msg_tmpl = 'Expected %d items in field %s, but only got %d'
|
||||||
elif op == 'maxcount':
|
elif op == 'maxcount':
|
||||||
assert_func = assertLessEqual
|
assert_func = self.assertLessEqual
|
||||||
msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
|
msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
|
||||||
elif op == 'count':
|
elif op == 'count':
|
||||||
assert_func = assertEqual
|
assert_func = self.assertEqual
|
||||||
msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
|
msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
|
||||||
else:
|
else:
|
||||||
assert False
|
assert False
|
||||||
assert_func(
|
assert_func(
|
||||||
self, len(got), expected_num,
|
len(got), expected_num,
|
||||||
msg_tmpl % (expected_num, field, len(got)))
|
msg_tmpl % (expected_num, field, len(got)))
|
||||||
return
|
return
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@ -262,27 +262,6 @@ def assertRegexpMatches(self, text, regexp, msg=None):
|
|||||||
self.assertTrue(m, msg)
|
self.assertTrue(m, msg)
|
||||||
|
|
||||||
|
|
||||||
def assertGreaterEqual(self, got, expected, msg=None):
|
|
||||||
if not (got >= expected):
|
|
||||||
if msg is None:
|
|
||||||
msg = '%r not greater than or equal to %r' % (got, expected)
|
|
||||||
self.assertTrue(got >= expected, msg)
|
|
||||||
|
|
||||||
|
|
||||||
def assertLessEqual(self, got, expected, msg=None):
|
|
||||||
if not (got <= expected):
|
|
||||||
if msg is None:
|
|
||||||
msg = '%r not less than or equal to %r' % (got, expected)
|
|
||||||
self.assertTrue(got <= expected, msg)
|
|
||||||
|
|
||||||
|
|
||||||
def assertEqual(self, got, expected, msg=None):
|
|
||||||
if not (got == expected):
|
|
||||||
if msg is None:
|
|
||||||
msg = '%r not equal to %r' % (got, expected)
|
|
||||||
self.assertTrue(got == expected, msg)
|
|
||||||
|
|
||||||
|
|
||||||
def expect_warnings(ydl, warnings_re):
|
def expect_warnings(ydl, warnings_re):
|
||||||
real_warning = ydl.report_warning
|
real_warning = ydl.report_warning
|
||||||
|
|
||||||
|
@ -153,6 +153,9 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
'''
|
'''
|
||||||
search = self.ie._search_nextjs_data(html, 'testID')
|
search = self.ie._search_nextjs_data(html, 'testID')
|
||||||
self.assertEqual(search['props']['pageProps']['video']['id'], 'testid')
|
self.assertEqual(search['props']['pageProps']['video']['id'], 'testid')
|
||||||
|
search = self.ie._search_nextjs_data(
|
||||||
|
'no next.js data here, move along', 'testID', default={'status': 0})
|
||||||
|
self.assertEqual(search['status'], 0)
|
||||||
|
|
||||||
def test_search_nuxt_data(self):
|
def test_search_nuxt_data(self):
|
||||||
html = '''
|
html = '''
|
||||||
|
@ -9,7 +9,6 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import (
|
from test.helper import (
|
||||||
assertGreaterEqual,
|
|
||||||
expect_warnings,
|
expect_warnings,
|
||||||
get_params,
|
get_params,
|
||||||
gettestcases,
|
gettestcases,
|
||||||
@ -35,12 +34,20 @@ from youtube_dl.utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
|
IDENTITY,
|
||||||
|
preferredencoding,
|
||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
)
|
)
|
||||||
from youtube_dl.extractor import get_info_extractor
|
from youtube_dl.extractor import get_info_extractor
|
||||||
|
|
||||||
RETRIES = 3
|
RETRIES = 3
|
||||||
|
|
||||||
|
# Some unittest APIs require actual str
|
||||||
|
if not isinstance('TEST', str):
|
||||||
|
_encode_str = lambda s: s.encode(preferredencoding())
|
||||||
|
else:
|
||||||
|
_encode_str = IDENTITY
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDL(youtube_dl.YoutubeDL):
|
class YoutubeDL(youtube_dl.YoutubeDL):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -101,7 +108,7 @@ def generator(test_case, tname):
|
|||||||
|
|
||||||
def print_skipping(reason):
|
def print_skipping(reason):
|
||||||
print('Skipping %s: %s' % (test_case['name'], reason))
|
print('Skipping %s: %s' % (test_case['name'], reason))
|
||||||
self.skipTest(reason)
|
self.skipTest(_encode_str(reason))
|
||||||
|
|
||||||
if not ie.working():
|
if not ie.working():
|
||||||
print_skipping('IE marked as not _WORKING')
|
print_skipping('IE marked as not _WORKING')
|
||||||
@ -122,7 +129,10 @@ def generator(test_case, tname):
|
|||||||
params['outtmpl'] = tname + '_' + params['outtmpl']
|
params['outtmpl'] = tname + '_' + params['outtmpl']
|
||||||
if is_playlist and 'playlist' not in test_case:
|
if is_playlist and 'playlist' not in test_case:
|
||||||
params.setdefault('extract_flat', 'in_playlist')
|
params.setdefault('extract_flat', 'in_playlist')
|
||||||
params.setdefault('playlistend', test_case.get('playlist_mincount'))
|
params.setdefault('playlistend',
|
||||||
|
test_case['playlist_maxcount'] + 1
|
||||||
|
if test_case.get('playlist_maxcount')
|
||||||
|
else test_case.get('playlist_mincount'))
|
||||||
params.setdefault('skip_download', True)
|
params.setdefault('skip_download', True)
|
||||||
|
|
||||||
ydl = YoutubeDL(params, auto_init=False)
|
ydl = YoutubeDL(params, auto_init=False)
|
||||||
@ -183,13 +193,19 @@ def generator(test_case, tname):
|
|||||||
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
|
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
|
||||||
|
|
||||||
if 'playlist_mincount' in test_case:
|
if 'playlist_mincount' in test_case:
|
||||||
assertGreaterEqual(
|
self.assertGreaterEqual(
|
||||||
self,
|
|
||||||
len(res_dict['entries']),
|
len(res_dict['entries']),
|
||||||
test_case['playlist_mincount'],
|
test_case['playlist_mincount'],
|
||||||
'Expected at least %d in playlist %s, but got only %d' % (
|
'Expected at least %d in playlist %s, but got only %d' % (
|
||||||
test_case['playlist_mincount'], test_case['url'],
|
test_case['playlist_mincount'], test_case['url'],
|
||||||
len(res_dict['entries'])))
|
len(res_dict['entries'])))
|
||||||
|
if 'playlist_maxcount' in test_case:
|
||||||
|
self.assertLessEqual(
|
||||||
|
len(res_dict['entries']),
|
||||||
|
test_case['playlist_maxcount'],
|
||||||
|
'Expected at most %d in playlist %s, but got %d' % (
|
||||||
|
test_case['playlist_maxcount'], test_case['url'],
|
||||||
|
len(res_dict['entries'])))
|
||||||
if 'playlist_count' in test_case:
|
if 'playlist_count' in test_case:
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
len(res_dict['entries']),
|
len(res_dict['entries']),
|
||||||
@ -231,8 +247,8 @@ def generator(test_case, tname):
|
|||||||
if params.get('test'):
|
if params.get('test'):
|
||||||
expected_minsize = max(expected_minsize, 10000)
|
expected_minsize = max(expected_minsize, 10000)
|
||||||
got_fsize = os.path.getsize(tc_filename)
|
got_fsize = os.path.getsize(tc_filename)
|
||||||
assertGreaterEqual(
|
self.assertGreaterEqual(
|
||||||
self, got_fsize, expected_minsize,
|
got_fsize, expected_minsize,
|
||||||
'Expected %s to be at least %s, but it\'s only %s ' %
|
'Expected %s to be at least %s, but it\'s only %s ' %
|
||||||
(tc_filename, format_bytes(expected_minsize),
|
(tc_filename, format_bytes(expected_minsize),
|
||||||
format_bytes(got_fsize)))
|
format_bytes(got_fsize)))
|
||||||
|
@ -11,194 +11,146 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from youtube_dl.compat import compat_str
|
||||||
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
|
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
|
||||||
|
|
||||||
|
NaN = object()
|
||||||
|
|
||||||
|
|
||||||
class TestJSInterpreter(unittest.TestCase):
|
class TestJSInterpreter(unittest.TestCase):
|
||||||
|
def _test(self, jsi_or_code, expected, func='f', args=()):
|
||||||
|
if isinstance(jsi_or_code, compat_str):
|
||||||
|
jsi_or_code = JSInterpreter(jsi_or_code)
|
||||||
|
got = jsi_or_code.call_function(func, *args)
|
||||||
|
if expected is NaN:
|
||||||
|
self.assertTrue(math.isnan(got), '{0} is not NaN'.format(got))
|
||||||
|
else:
|
||||||
|
self.assertEqual(got, expected)
|
||||||
|
|
||||||
def test_basic(self):
|
def test_basic(self):
|
||||||
jsi = JSInterpreter('function x(){;}')
|
jsi = JSInterpreter('function f(){;}')
|
||||||
self.assertEqual(jsi.call_function('x'), None)
|
self.assertEqual(repr(jsi.extract_function('f')), 'F<f>')
|
||||||
self.assertEqual(repr(jsi.extract_function('x')), 'F<x>')
|
self._test(jsi, None)
|
||||||
|
|
||||||
jsi = JSInterpreter('function x3(){return 42;}')
|
self._test('function f(){return 42;}', 42)
|
||||||
self.assertEqual(jsi.call_function('x3'), 42)
|
self._test('function f(){42}', None)
|
||||||
|
self._test('var f = function(){return 42;}', 42)
|
||||||
jsi = JSInterpreter('function x3(){42}')
|
|
||||||
self.assertEqual(jsi.call_function('x3'), None)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('var x5 = function(){return 42;}')
|
|
||||||
self.assertEqual(jsi.call_function('x5'), 42)
|
|
||||||
|
|
||||||
def test_calc(self):
|
|
||||||
jsi = JSInterpreter('function x4(a){return 2*a+1;}')
|
|
||||||
self.assertEqual(jsi.call_function('x4', 3), 7)
|
|
||||||
|
|
||||||
def test_add(self):
|
def test_add(self):
|
||||||
jsi = JSInterpreter('function f(){return 42 + 7;}')
|
self._test('function f(){return 42 + 7;}', 49)
|
||||||
self.assertEqual(jsi.call_function('f'), 49)
|
self._test('function f(){return 42 + undefined;}', NaN)
|
||||||
jsi = JSInterpreter('function f(){return 42 + undefined;}')
|
self._test('function f(){return 42 + null;}', 42)
|
||||||
self.assertTrue(math.isnan(jsi.call_function('f')))
|
|
||||||
jsi = JSInterpreter('function f(){return 42 + null;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 42)
|
|
||||||
|
|
||||||
def test_sub(self):
|
def test_sub(self):
|
||||||
jsi = JSInterpreter('function f(){return 42 - 7;}')
|
self._test('function f(){return 42 - 7;}', 35)
|
||||||
self.assertEqual(jsi.call_function('f'), 35)
|
self._test('function f(){return 42 - undefined;}', NaN)
|
||||||
jsi = JSInterpreter('function f(){return 42 - undefined;}')
|
self._test('function f(){return 42 - null;}', 42)
|
||||||
self.assertTrue(math.isnan(jsi.call_function('f')))
|
|
||||||
jsi = JSInterpreter('function f(){return 42 - null;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 42)
|
|
||||||
|
|
||||||
def test_mul(self):
|
def test_mul(self):
|
||||||
jsi = JSInterpreter('function f(){return 42 * 7;}')
|
self._test('function f(){return 42 * 7;}', 294)
|
||||||
self.assertEqual(jsi.call_function('f'), 294)
|
self._test('function f(){return 42 * undefined;}', NaN)
|
||||||
jsi = JSInterpreter('function f(){return 42 * undefined;}')
|
self._test('function f(){return 42 * null;}', 0)
|
||||||
self.assertTrue(math.isnan(jsi.call_function('f')))
|
|
||||||
jsi = JSInterpreter('function f(){return 42 * null;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 0)
|
|
||||||
|
|
||||||
def test_div(self):
|
def test_div(self):
|
||||||
jsi = JSInterpreter('function f(a, b){return a / b;}')
|
jsi = JSInterpreter('function f(a, b){return a / b;}')
|
||||||
self.assertTrue(math.isnan(jsi.call_function('f', 0, 0)))
|
self._test(jsi, NaN, args=(0, 0))
|
||||||
self.assertTrue(math.isnan(jsi.call_function('f', JS_Undefined, 1)))
|
self._test(jsi, NaN, args=(JS_Undefined, 1))
|
||||||
self.assertTrue(math.isinf(jsi.call_function('f', 2, 0)))
|
self._test(jsi, float('inf'), args=(2, 0))
|
||||||
self.assertEqual(jsi.call_function('f', 0, 3), 0)
|
self._test(jsi, 0, args=(0, 3))
|
||||||
|
|
||||||
def test_mod(self):
|
def test_mod(self):
|
||||||
jsi = JSInterpreter('function f(){return 42 % 7;}')
|
self._test('function f(){return 42 % 7;}', 0)
|
||||||
self.assertEqual(jsi.call_function('f'), 0)
|
self._test('function f(){return 42 % 0;}', NaN)
|
||||||
jsi = JSInterpreter('function f(){return 42 % 0;}')
|
self._test('function f(){return 42 % undefined;}', NaN)
|
||||||
self.assertTrue(math.isnan(jsi.call_function('f')))
|
|
||||||
jsi = JSInterpreter('function f(){return 42 % undefined;}')
|
|
||||||
self.assertTrue(math.isnan(jsi.call_function('f')))
|
|
||||||
|
|
||||||
def test_exp(self):
|
def test_exp(self):
|
||||||
jsi = JSInterpreter('function f(){return 42 ** 2;}')
|
self._test('function f(){return 42 ** 2;}', 1764)
|
||||||
self.assertEqual(jsi.call_function('f'), 1764)
|
self._test('function f(){return 42 ** undefined;}', NaN)
|
||||||
jsi = JSInterpreter('function f(){return 42 ** undefined;}')
|
self._test('function f(){return 42 ** null;}', 1)
|
||||||
self.assertTrue(math.isnan(jsi.call_function('f')))
|
self._test('function f(){return undefined ** 42;}', NaN)
|
||||||
jsi = JSInterpreter('function f(){return 42 ** null;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 1)
|
def test_calc(self):
|
||||||
jsi = JSInterpreter('function f(){return undefined ** 42;}')
|
self._test('function f(a){return 2*a+1;}', 7, args=[3])
|
||||||
self.assertTrue(math.isnan(jsi.call_function('f')))
|
|
||||||
|
|
||||||
def test_empty_return(self):
|
def test_empty_return(self):
|
||||||
jsi = JSInterpreter('function f(){return; y()}')
|
self._test('function f(){return; y()}', None)
|
||||||
self.assertEqual(jsi.call_function('f'), None)
|
|
||||||
|
|
||||||
def test_morespace(self):
|
def test_morespace(self):
|
||||||
jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }')
|
self._test('function f (a) { return 2 * a + 1 ; }', 7, args=[3])
|
||||||
self.assertEqual(jsi.call_function('x', 3), 7)
|
self._test('function f () { x = 2 ; return x; }', 2)
|
||||||
|
|
||||||
jsi = JSInterpreter('function f () { x = 2 ; return x; }')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 2)
|
|
||||||
|
|
||||||
def test_strange_chars(self):
|
def test_strange_chars(self):
|
||||||
jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }')
|
self._test('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }',
|
||||||
self.assertEqual(jsi.call_function('$_xY1', 20), 21)
|
21, args=[20], func='$_xY1')
|
||||||
|
|
||||||
def test_operators(self):
|
def test_operators(self):
|
||||||
jsi = JSInterpreter('function f(){return 1 << 5;}')
|
self._test('function f(){return 1 << 5;}', 32)
|
||||||
self.assertEqual(jsi.call_function('f'), 32)
|
self._test('function f(){return 2 ** 5}', 32)
|
||||||
|
self._test('function f(){return 19 & 21;}', 17)
|
||||||
jsi = JSInterpreter('function f(){return 2 ** 5}')
|
self._test('function f(){return 11 >> 2;}', 2)
|
||||||
self.assertEqual(jsi.call_function('f'), 32)
|
self._test('function f(){return []? 2+3: 4;}', 5)
|
||||||
|
self._test('function f(){return 1 == 2}', False)
|
||||||
jsi = JSInterpreter('function f(){return 19 & 21;}')
|
self._test('function f(){return 0 && 1 || 2;}', 2)
|
||||||
self.assertEqual(jsi.call_function('f'), 17)
|
self._test('function f(){return 0 ?? 42;}', 0)
|
||||||
|
self._test('function f(){return "life, the universe and everything" < 42;}', False)
|
||||||
jsi = JSInterpreter('function f(){return 11 >> 2;}')
|
# https://github.com/ytdl-org/youtube-dl/issues/32815
|
||||||
self.assertEqual(jsi.call_function('f'), 2)
|
self._test('function f(){return 0 - 7 * - 6;}', 42)
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 5)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return 1 == 2}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), False)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return 0 && 1 || 2;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 2)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return 0 ?? 42;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 0)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
|
|
||||||
self.assertFalse(jsi.call_function('f'))
|
|
||||||
|
|
||||||
def test_array_access(self):
|
def test_array_access(self):
|
||||||
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
|
self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
|
||||||
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
|
|
||||||
|
|
||||||
def test_parens(self):
|
def test_parens(self):
|
||||||
jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}')
|
self._test('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}', 7)
|
||||||
self.assertEqual(jsi.call_function('f'), 7)
|
self._test('function f(){return (1 + 2) * 3;}', 9)
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 9)
|
|
||||||
|
|
||||||
def test_quotes(self):
|
def test_quotes(self):
|
||||||
jsi = JSInterpreter(r'function f(){return "a\"\\("}')
|
self._test(r'function f(){return "a\"\\("}', r'a"\(')
|
||||||
self.assertEqual(jsi.call_function('f'), r'a"\(')
|
|
||||||
|
|
||||||
def test_assignments(self):
|
def test_assignments(self):
|
||||||
jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
|
self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31)
|
||||||
self.assertEqual(jsi.call_function('f'), 31)
|
self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
|
||||||
|
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
|
||||||
jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 51)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), -11)
|
|
||||||
|
|
||||||
|
@unittest.skip('Not yet fully implemented')
|
||||||
def test_comments(self):
|
def test_comments(self):
|
||||||
'Skipping: Not yet fully implemented'
|
self._test('''
|
||||||
return
|
function f() {
|
||||||
jsi = JSInterpreter('''
|
var x = /* 1 + */ 2;
|
||||||
function x() {
|
var y = /* 30
|
||||||
var x = /* 1 + */ 2;
|
* 40 */ 50;
|
||||||
var y = /* 30
|
return x + y;
|
||||||
* 40 */ 50;
|
}
|
||||||
return x + y;
|
''', 52)
|
||||||
}
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 52)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function f() {
|
function f() {
|
||||||
var x = "/*";
|
var x = "/*";
|
||||||
var y = 1 /* comment */ + 2;
|
var y = 1 /* comment */ + 2;
|
||||||
return y;
|
return y;
|
||||||
}
|
}
|
||||||
''')
|
''', 3)
|
||||||
self.assertEqual(jsi.call_function('f'), 3)
|
|
||||||
|
|
||||||
def test_precedence(self):
|
def test_precedence(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
var a = [10, 20, 30, 40, 50];
|
var a = [10, 20, 30, 40, 50];
|
||||||
var b = 6;
|
var b = 6;
|
||||||
a[0]=a[b%a.length];
|
a[0]=a[b%a.length];
|
||||||
return a;
|
return a;
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
''', [20, 20, 30, 40, 50])
|
||||||
|
|
||||||
def test_builtins(self):
|
def test_builtins(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return NaN }', NaN)
|
||||||
function x() { return NaN }
|
|
||||||
''')
|
|
||||||
self.assertTrue(math.isnan(jsi.call_function('x')))
|
|
||||||
|
|
||||||
def test_Date(self):
|
def test_Date(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return new Date("Wednesday 31 December 1969 18:01:26 MDT") - 0; }', 86000)
|
||||||
function x(dt) { return new Date(dt) - 0; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
|
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function f(dt) { return new Date(dt) - 0; }')
|
||||||
# date format m/d/y
|
# date format m/d/y
|
||||||
self.assertEqual(jsi.call_function('x', '12/31/1969 18:01:26 MDT'), 86000)
|
self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT'])
|
||||||
|
|
||||||
# epoch 0
|
# epoch 0
|
||||||
self.assertEqual(jsi.call_function('x', '1 January 1970 00:00:00 UTC'), 0)
|
self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
|
||||||
|
|
||||||
def test_call(self):
|
def test_call(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
@ -206,179 +158,115 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
function y(a) { return x() + (a?a:0); }
|
function y(a) { return x() + (a?a:0); }
|
||||||
function z() { return y(3); }
|
function z() { return y(3); }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('z'), 5)
|
self._test(jsi, 5, func='z')
|
||||||
self.assertEqual(jsi.call_function('y'), 2)
|
self._test(jsi, 2, func='y')
|
||||||
|
|
||||||
def test_if(self):
|
def test_if(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
let a = 9;
|
let a = 9;
|
||||||
if (0==0) {a++}
|
if (0==0) {a++}
|
||||||
return a
|
return a
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
''', 10)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
if (0==0) {return 10}
|
if (0==0) {return 10}
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
''', 10)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
if (0!=0) {return 1}
|
if (0!=0) {return 1}
|
||||||
else {return 10}
|
else {return 10}
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
''', 10)
|
||||||
|
|
||||||
""" # Unsupported
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() {
|
|
||||||
if (0!=0) return 1;
|
|
||||||
else {return 10}
|
|
||||||
}''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
|
||||||
"""
|
|
||||||
|
|
||||||
def test_elseif(self):
|
def test_elseif(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
if (0!=0) {return 1}
|
if (0!=0) {return 1}
|
||||||
else if (1==0) {return 2}
|
else if (1==0) {return 2}
|
||||||
else {return 10}
|
else {return 10}
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
''', 10)
|
||||||
|
|
||||||
""" # Unsupported
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() {
|
|
||||||
if (0!=0) return 1;
|
|
||||||
else if (1==0) {return 2}
|
|
||||||
else {return 10}
|
|
||||||
}''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
|
||||||
# etc
|
|
||||||
"""
|
|
||||||
|
|
||||||
def test_for_loop(self):
|
def test_for_loop(self):
|
||||||
# function x() { a=0; for (i=0; i-10; i++) {a++} a }
|
self._test('function f() { a=0; for (i=0; i-10; i++) {a++} return a }', 10)
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { a=0; for (i=0; i-10; i++) {a++} return a }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
|
||||||
|
|
||||||
def test_while_loop(self):
|
def test_while_loop(self):
|
||||||
# function x() { a=0; while (a<10) {a++} a }
|
self._test('function f() { a=0; while (a<10) {a++} return a }', 10)
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { a=0; while (a<10) {a++} return a }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
|
||||||
|
|
||||||
def test_switch(self):
|
def test_switch(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x(f) { switch(f){
|
function f(x) { switch(x){
|
||||||
case 1:f+=1;
|
case 1:x+=1;
|
||||||
case 2:f+=2;
|
case 2:x+=2;
|
||||||
case 3:f+=3;break;
|
case 3:x+=3;break;
|
||||||
case 4:f+=4;
|
case 4:x+=4;
|
||||||
default:f=0;
|
default:x=0;
|
||||||
} return f }
|
} return x }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x', 1), 7)
|
self._test(jsi, 7, args=[1])
|
||||||
self.assertEqual(jsi.call_function('x', 3), 6)
|
self._test(jsi, 6, args=[3])
|
||||||
self.assertEqual(jsi.call_function('x', 5), 0)
|
self._test(jsi, 0, args=[5])
|
||||||
|
|
||||||
def test_switch_default(self):
|
def test_switch_default(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x(f) { switch(f){
|
function f(x) { switch(x){
|
||||||
case 2: f+=2;
|
case 2: x+=2;
|
||||||
default: f-=1;
|
default: x-=1;
|
||||||
case 5:
|
case 5:
|
||||||
case 6: f+=6;
|
case 6: x+=6;
|
||||||
case 0: break;
|
case 0: break;
|
||||||
case 1: f+=1;
|
case 1: x+=1;
|
||||||
} return f }
|
} return x }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x', 1), 2)
|
self._test(jsi, 2, args=[1])
|
||||||
self.assertEqual(jsi.call_function('x', 5), 11)
|
self._test(jsi, 11, args=[5])
|
||||||
self.assertEqual(jsi.call_function('x', 9), 14)
|
self._test(jsi, 14, args=[9])
|
||||||
|
|
||||||
def test_try(self):
|
def test_try(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { try{return 10} catch(e){return 5} }', 10)
|
||||||
function x() { try{return 10} catch(e){return 5} }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
|
||||||
|
|
||||||
def test_catch(self):
|
def test_catch(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { try{throw 10} catch(e){return 5} }', 5)
|
||||||
function x() { try{throw 10} catch(e){return 5} }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 5)
|
|
||||||
|
|
||||||
def test_finally(self):
|
def test_finally(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { try{throw 10} finally {return 42} }', 42)
|
||||||
function x() { try{throw 10} finally {return 42} }
|
self._test('function f() { try{throw 10} catch(e){return 5} finally {return 42} }', 42)
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { try{throw 10} catch(e){return 5} finally {return 42} }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
|
|
||||||
def test_nested_try(self):
|
def test_nested_try(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {try {
|
function f() {try {
|
||||||
try{throw 10} finally {throw 42}
|
try{throw 10} finally {throw 42}
|
||||||
} catch(e){return 5} }
|
} catch(e){return 5} }
|
||||||
''')
|
''', 5)
|
||||||
self.assertEqual(jsi.call_function('x'), 5)
|
|
||||||
|
|
||||||
def test_for_loop_continue(self):
|
def test_for_loop_continue(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }', 0)
|
||||||
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 0)
|
|
||||||
|
|
||||||
def test_for_loop_break(self):
|
def test_for_loop_break(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { a=0; for (i=0; i-10; i++) { break; a++ } return a }', 0)
|
||||||
function x() { a=0; for (i=0; i-10; i++) { break; a++ } return a }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 0)
|
|
||||||
|
|
||||||
def test_for_loop_try(self):
|
def test_for_loop_try(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
|
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
|
||||||
return 42 }
|
return 42 }
|
||||||
''')
|
''', 42)
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
|
|
||||||
def test_literal_list(self):
|
def test_literal_list(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return [1, 2, "asdf", [5, 6, 7]][3] }', [5, 6, 7])
|
||||||
function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [5, 6, 7])
|
|
||||||
|
|
||||||
def test_comma(self):
|
def test_comma(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { a=5; a -= 1, a+=3; return a }', 7)
|
||||||
function x() { a=5; a -= 1, a+=3; return a }
|
self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7)
|
||||||
''')
|
self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
|
||||||
self.assertEqual(jsi.call_function('x'), 7)
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { a=5; return (a -= 1, a+=3, a); }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 7)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 5)
|
|
||||||
|
|
||||||
def test_void(self):
|
def test_void(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return void 42; }', None)
|
||||||
function x() { return void 42; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), None)
|
|
||||||
|
|
||||||
def test_return_function(self):
|
def test_return_function(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
@ -387,110 +275,60 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self.assertEqual(jsi.call_function('x')([]), 1)
|
self.assertEqual(jsi.call_function('x')([]), 1)
|
||||||
|
|
||||||
def test_null(self):
|
def test_null(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return null; }', None)
|
||||||
function x() { return null; }
|
self._test('function f() { return [null > 0, null < 0, null == 0, null === 0]; }',
|
||||||
''')
|
[False, False, False, False])
|
||||||
self.assertIs(jsi.call_function('x'), None)
|
self._test('function f() { return [null >= 0, null <= 0]; }', [True, True])
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [null > 0, null < 0, null == 0, null === 0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [null >= 0, null <= 0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [True, True])
|
|
||||||
|
|
||||||
def test_undefined(self):
|
def test_undefined(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return undefined === undefined; }', True)
|
||||||
function x() { return undefined === undefined; }
|
self._test('function f() { return undefined; }', JS_Undefined)
|
||||||
''')
|
self._test('function f() {return undefined ?? 42; }', 42)
|
||||||
self.assertTrue(jsi.call_function('x'))
|
self._test('function f() { let v; return v; }', JS_Undefined)
|
||||||
|
self._test('function f() { let v; return v**0; }', 1)
|
||||||
|
self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }',
|
||||||
|
[False, False, JS_Undefined, JS_Undefined])
|
||||||
|
|
||||||
|
self._test('''
|
||||||
|
function f() { return [
|
||||||
|
undefined === undefined,
|
||||||
|
undefined == undefined,
|
||||||
|
undefined == null
|
||||||
|
]; }
|
||||||
|
''', [True] * 3)
|
||||||
|
self._test('''
|
||||||
|
function f() { return [
|
||||||
|
undefined < undefined,
|
||||||
|
undefined > undefined,
|
||||||
|
undefined === 0,
|
||||||
|
undefined == 0,
|
||||||
|
undefined < 0,
|
||||||
|
undefined > 0,
|
||||||
|
undefined >= 0,
|
||||||
|
undefined <= 0,
|
||||||
|
undefined > null,
|
||||||
|
undefined < null,
|
||||||
|
undefined === null
|
||||||
|
]; }
|
||||||
|
''', [False] * 11)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { return undefined; }
|
function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
|
||||||
''')
|
|
||||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let v; return v; }
|
|
||||||
''')
|
|
||||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [True, True, False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [undefined === 0, undefined == 0, undefined < 0, undefined > 0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [undefined >= 0, undefined <= 0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [undefined > null, undefined < null, undefined == null, undefined === null]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False, True, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [undefined === null, undefined == null, undefined < null, undefined > null]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, True, False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
|
|
||||||
''')
|
''')
|
||||||
for y in jsi.call_function('x'):
|
for y in jsi.call_function('x'):
|
||||||
self.assertTrue(math.isnan(y))
|
self.assertTrue(math.isnan(y))
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let v; return v**0; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 1)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function x(){return undefined ?? 42; }')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
|
|
||||||
def test_object(self):
|
def test_object(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return {}; }', {})
|
||||||
function x() { return {}; }
|
self._test('function f() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }', [42, 0])
|
||||||
''')
|
self._test('function f() { let a; return a?.qq; }', JS_Undefined)
|
||||||
self.assertEqual(jsi.call_function('x'), {})
|
self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [42, 0])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let a; return a?.qq; }
|
|
||||||
''')
|
|
||||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
|
|
||||||
''')
|
|
||||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
|
||||||
|
|
||||||
def test_regex(self):
|
def test_regex(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
|
||||||
function x() { let a=/,,[/,913,/](,)}/; }
|
|
||||||
''')
|
|
||||||
self.assertIs(jsi.call_function('x'), None)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
|
function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
|
||||||
''')
|
''')
|
||||||
attrs = set(('findall', 'finditer', 'match', 'scanner', 'search',
|
attrs = set(('findall', 'finditer', 'match', 'scanner', 'search',
|
||||||
'split', 'sub', 'subn'))
|
'split', 'sub', 'subn'))
|
||||||
@ -500,92 +338,120 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self.assertSetEqual(set(dir(jsi.call_function('x'))) & attrs, attrs)
|
self.assertSetEqual(set(dir(jsi.call_function('x'))) & attrs, attrs)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { let a=/,,[/,913,/](,)}/i; return a; }
|
function x() { let a=/,,[/,913,/](,)}/i; return a; }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
|
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
|
||||||
|
|
||||||
jsi = JSInterpreter(r'''
|
jsi = JSInterpreter(r'function f() { let a=/,][}",],()}(\[)/; return a; }')
|
||||||
function x() { let a="data-name".replace("data-", ""); return a }
|
self.assertEqual(jsi.call_function('f').pattern, r',][}",],()}(\[)')
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 'name')
|
|
||||||
|
|
||||||
jsi = JSInterpreter(r'''
|
jsi = JSInterpreter(r'function f() { let a=[/[)\\]/]; return a[0]; }')
|
||||||
function x() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }
|
self.assertEqual(jsi.call_function('f').pattern, r'[)\\]')
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 'name')
|
|
||||||
|
|
||||||
jsi = JSInterpreter(r'''
|
def test_replace(self):
|
||||||
function x() { let a="data-name".replace(/^.+-/, ""); return a; }
|
self._test('function f() { let a="data-name".replace("data-", ""); return a }',
|
||||||
''')
|
'name')
|
||||||
self.assertEqual(jsi.call_function('x'), 'name')
|
self._test('function f() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }',
|
||||||
|
'name')
|
||||||
jsi = JSInterpreter(r'''
|
self._test('function f() { let a="data-name".replace(/^.+-/, ""); return a; }',
|
||||||
function x() { let a="data-name".replace(/a/g, "o"); return a; }
|
'name')
|
||||||
''')
|
self._test('function f() { let a="data-name".replace(/a/g, "o"); return a; }',
|
||||||
self.assertEqual(jsi.call_function('x'), 'doto-nome')
|
'doto-nome')
|
||||||
|
self._test('function f() { let a="data-name".replaceAll("a", "o"); return a; }',
|
||||||
jsi = JSInterpreter(r'''
|
'doto-nome')
|
||||||
function x() { let a="data-name".replaceAll("a", "o"); return a; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 'doto-nome')
|
|
||||||
|
|
||||||
jsi = JSInterpreter(r'''
|
|
||||||
function x() { let a=[/[)\\]/]; return a[0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
|
|
||||||
|
|
||||||
""" # fails
|
|
||||||
jsi = JSInterpreter(r'''
|
|
||||||
function x() { let a=100; a/=/[0-9]+/.exec('divide by 20 today')[0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 5)
|
|
||||||
"""
|
|
||||||
|
|
||||||
def test_char_code_at(self):
|
def test_char_code_at(self):
|
||||||
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
|
jsi = JSInterpreter('function f(i){return "test".charCodeAt(i)}')
|
||||||
self.assertEqual(jsi.call_function('x', 0), 116)
|
self._test(jsi, 116, args=[0])
|
||||||
self.assertEqual(jsi.call_function('x', 1), 101)
|
self._test(jsi, 101, args=[1])
|
||||||
self.assertEqual(jsi.call_function('x', 2), 115)
|
self._test(jsi, 115, args=[2])
|
||||||
self.assertEqual(jsi.call_function('x', 3), 116)
|
self._test(jsi, 116, args=[3])
|
||||||
self.assertEqual(jsi.call_function('x', 4), None)
|
self._test(jsi, None, args=[4])
|
||||||
self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
|
self._test(jsi, 116, args=['not_a_number'])
|
||||||
|
|
||||||
def test_bitwise_operators_overflow(self):
|
def test_bitwise_operators_overflow(self):
|
||||||
jsi = JSInterpreter('function x(){return -524999584 << 5}')
|
self._test('function f(){return -524999584 << 5}', 379882496)
|
||||||
self.assertEqual(jsi.call_function('x'), 379882496)
|
self._test('function f(){return 1236566549 << 5}', 915423904)
|
||||||
|
|
||||||
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
def test_bitwise_operators_typecast(self):
|
||||||
self.assertEqual(jsi.call_function('x'), 915423904)
|
# madness
|
||||||
|
self._test('function f(){return null << 5}', 0)
|
||||||
|
self._test('function f(){return undefined >> 5}', 0)
|
||||||
|
self._test('function f(){return 42 << NaN}', 42)
|
||||||
|
self._test('function f(){return 42 << Infinity}', 42)
|
||||||
|
|
||||||
def test_bitwise_operators_madness(self):
|
def test_negative(self):
|
||||||
jsi = JSInterpreter('function x(){return null << 5}')
|
self._test('function f(){return 2 * -2.0 ;}', -4)
|
||||||
self.assertEqual(jsi.call_function('x'), 0)
|
self._test('function f(){return 2 - - -2 ;}', 0)
|
||||||
|
self._test('function f(){return 2 - - - -2 ;}', 4)
|
||||||
jsi = JSInterpreter('function x(){return undefined >> 5}')
|
self._test('function f(){return 2 - + + - -2;}', 0)
|
||||||
self.assertEqual(jsi.call_function('x'), 0)
|
self._test('function f(){return 2 + - + - -2;}', 0)
|
||||||
|
|
||||||
jsi = JSInterpreter('function x(){return 42 << NaN}')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function x(){return 42 << Infinity}')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
|
|
||||||
def test_32066(self):
|
def test_32066(self):
|
||||||
jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}")
|
self._test(
|
||||||
self.assertEqual(jsi.call_function('x'), 70)
|
"function f(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}",
|
||||||
|
70)
|
||||||
|
|
||||||
def test_unary_operators(self):
|
@unittest.skip('Not yet working')
|
||||||
jsi = JSInterpreter('function f(){return 2 - - - 2;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 0)
|
|
||||||
# fails
|
|
||||||
# jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
|
|
||||||
# self.assertEqual(jsi.call_function('f'), 0)
|
|
||||||
|
|
||||||
""" # fails so far
|
|
||||||
def test_packed(self):
|
def test_packed(self):
|
||||||
jsi = JSInterpreter('''function x(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
|
self._test(
|
||||||
self.assertEqual(jsi.call_function('x', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|')))
|
'''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''',
|
||||||
"""
|
'''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|'))
|
||||||
|
|
||||||
|
def test_join(self):
|
||||||
|
test_input = list('test')
|
||||||
|
tests = [
|
||||||
|
'function f(a, b){return a.join(b)}',
|
||||||
|
'function f(a, b){return Array.prototype.join.call(a, b)}',
|
||||||
|
'function f(a, b){return Array.prototype.join.apply(a, [b])}',
|
||||||
|
]
|
||||||
|
for test in tests:
|
||||||
|
jsi = JSInterpreter(test)
|
||||||
|
self._test(jsi, 'test', args=[test_input, ''])
|
||||||
|
self._test(jsi, 't-e-s-t', args=[test_input, '-'])
|
||||||
|
self._test(jsi, '', args=[[], '-'])
|
||||||
|
|
||||||
|
def test_split(self):
|
||||||
|
test_result = list('test')
|
||||||
|
tests = [
|
||||||
|
'function f(a, b){return a.split(b)}',
|
||||||
|
'function f(a, b){return String.prototype.split.call(a, b)}',
|
||||||
|
'function f(a, b){return String.prototype.split.apply(a, [b])}',
|
||||||
|
]
|
||||||
|
for test in tests:
|
||||||
|
jsi = JSInterpreter(test)
|
||||||
|
self._test(jsi, test_result, args=['test', ''])
|
||||||
|
self._test(jsi, test_result, args=['t-e-s-t', '-'])
|
||||||
|
self._test(jsi, [''], args=['', '-'])
|
||||||
|
self._test(jsi, [], args=['', ''])
|
||||||
|
|
||||||
|
def test_slice(self):
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', [])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', [])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', [])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', [])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7])
|
||||||
|
self._test('function f(){return "012345678".slice()}', '012345678')
|
||||||
|
self._test('function f(){return "012345678".slice(0)}', '012345678')
|
||||||
|
self._test('function f(){return "012345678".slice(5)}', '5678')
|
||||||
|
self._test('function f(){return "012345678".slice(99)}', '')
|
||||||
|
self._test('function f(){return "012345678".slice(-2)}', '78')
|
||||||
|
self._test('function f(){return "012345678".slice(-99)}', '012345678')
|
||||||
|
self._test('function f(){return "012345678".slice(0, 0)}', '')
|
||||||
|
self._test('function f(){return "012345678".slice(1, 0)}', '')
|
||||||
|
self._test('function f(){return "012345678".slice(0, 1)}', '0')
|
||||||
|
self._test('function f(){return "012345678".slice(3, 6)}', '345')
|
||||||
|
self._test('function f(){return "012345678".slice(1, -1)}', '1234567')
|
||||||
|
self._test('function f(){return "012345678".slice(-1, 1)}', '')
|
||||||
|
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
509
test/test_traversal.py
Normal file
509
test/test_traversal.py
Normal file
@ -0,0 +1,509 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from youtube_dl.traversal import (
|
||||||
|
dict_get,
|
||||||
|
get_first,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
)
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_etree_fromstring,
|
||||||
|
compat_http_cookies,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
from youtube_dl.utils import (
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
_TEST_DATA = {
|
||||||
|
100: 100,
|
||||||
|
1.2: 1.2,
|
||||||
|
'str': 'str',
|
||||||
|
'None': None,
|
||||||
|
'...': Ellipsis,
|
||||||
|
'urls': [
|
||||||
|
{'index': 0, 'url': 'https://www.example.com/0'},
|
||||||
|
{'index': 1, 'url': 'https://www.example.com/1'},
|
||||||
|
],
|
||||||
|
'data': (
|
||||||
|
{'index': 2},
|
||||||
|
{'index': 3},
|
||||||
|
),
|
||||||
|
'dict': {},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info < (3, 0):
|
||||||
|
class _TestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
def assertCountEqual(self, *args, **kwargs):
|
||||||
|
return self.assertItemsEqual(*args, **kwargs)
|
||||||
|
else:
|
||||||
|
_TestCase = unittest.TestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TestTraversal(_TestCase):
|
||||||
|
def assertMaybeCountEqual(self, *args, **kwargs):
|
||||||
|
if sys.version_info < (3, 7):
|
||||||
|
# random dict order
|
||||||
|
return self.assertCountEqual(*args, **kwargs)
|
||||||
|
else:
|
||||||
|
return self.assertEqual(*args, **kwargs)
|
||||||
|
|
||||||
|
def test_traverse_obj(self):
|
||||||
|
# instant compat
|
||||||
|
str = compat_str
|
||||||
|
|
||||||
|
# define a pukka Iterable
|
||||||
|
def iter_range(stop):
|
||||||
|
for from_ in range(stop):
|
||||||
|
yield from_
|
||||||
|
|
||||||
|
# Test base functionality
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
||||||
|
msg='allow tuple path')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
|
||||||
|
msg='allow list path')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
|
||||||
|
msg='allow iterable path')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
|
||||||
|
msg='single items should be treated as a path')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
|
||||||
|
|
||||||
|
# Test Ellipsis behavior
|
||||||
|
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
|
||||||
|
(item for item in _TEST_DATA.values() if item not in (None, {})),
|
||||||
|
msg='`...` should give all non-discarded values')
|
||||||
|
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
|
||||||
|
msg='`...` selection for dicts should select all values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
|
||||||
|
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||||
|
msg='nested `...` queries should work')
|
||||||
|
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4),
|
||||||
|
msg='`...` query result should be flattened')
|
||||||
|
self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
|
||||||
|
msg='`...` should accept iterables')
|
||||||
|
|
||||||
|
# Test function as key
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
||||||
|
[_TEST_DATA['urls']],
|
||||||
|
msg='function as query key should perform a filter based on (key, value)')
|
||||||
|
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
|
||||||
|
msg='exceptions in the query function should be caught')
|
||||||
|
self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
|
||||||
|
msg='function key should accept iterables')
|
||||||
|
if __debug__:
|
||||||
|
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
||||||
|
traverse_obj(_TEST_DATA, lambda a: Ellipsis)
|
||||||
|
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
||||||
|
traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis)
|
||||||
|
|
||||||
|
# Test set as key (transformation/type, like `expected_type`)
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'],
|
||||||
|
msg='Function in set should be a transformation')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('fail', T(lambda _: 'const'))), 'const',
|
||||||
|
msg='Function in set should always be called')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'],
|
||||||
|
msg='Type in set should be a type filter')
|
||||||
|
self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str, int))), [100, 'str'],
|
||||||
|
msg='Multiple types in set should be a type filter')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA,
|
||||||
|
msg='A single set should be wrapped into a path')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'],
|
||||||
|
msg='Transformation function should not raise')
|
||||||
|
self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
|
||||||
|
[item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
|
||||||
|
msg='Function in set should be a transformation')
|
||||||
|
if __debug__:
|
||||||
|
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
||||||
|
traverse_obj(_TEST_DATA, set())
|
||||||
|
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
||||||
|
traverse_obj(_TEST_DATA, set((str.upper, str)))
|
||||||
|
|
||||||
|
# Test `slice` as a key
|
||||||
|
_SLICE_DATA = [0, 1, 2, 3, 4]
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
|
||||||
|
msg='slice on a dictionary should not throw')
|
||||||
|
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
|
||||||
|
msg='slice key should apply slice to sequence')
|
||||||
|
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
|
||||||
|
msg='slice key should apply slice to sequence')
|
||||||
|
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
|
||||||
|
msg='slice key should apply slice to sequence')
|
||||||
|
|
||||||
|
# Test alternative paths
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
||||||
|
msg='multiple `paths` should be treated as alternative paths')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
|
||||||
|
msg='alternatives should exit early')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
|
||||||
|
msg='alternatives should return `default` if exhausted')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
|
||||||
|
msg='alternatives should track their own branching return')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
|
||||||
|
msg='alternatives on empty objects should search further')
|
||||||
|
|
||||||
|
# Test branch and path nesting
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
|
||||||
|
msg='tuple as key should be treated as branches')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
|
||||||
|
msg='list as key should be treated as branches')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
|
||||||
|
msg='double nesting in path should be treated as paths')
|
||||||
|
self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
|
||||||
|
msg='do not fail early on branching')
|
||||||
|
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
|
||||||
|
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||||
|
msg='triple nesting in path should be treated as branches')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
|
||||||
|
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||||
|
msg='ellipsis as branch path start gets flattened')
|
||||||
|
|
||||||
|
# Test dictionary as key
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
|
||||||
|
msg='dict key should result in a dict with the same keys')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
|
||||||
|
{0: 'https://www.example.com/0'},
|
||||||
|
msg='dict key should allow paths')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
|
||||||
|
{0: ['https://www.example.com/0']},
|
||||||
|
msg='tuple in dict path should be treated as branches')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
|
||||||
|
{0: ['https://www.example.com/0']},
|
||||||
|
msg='double nesting in dict path should be treated as paths')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
|
||||||
|
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
|
||||||
|
msg='triple nesting in dict path should be treated as branches')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
|
||||||
|
msg='remove `None` values when top level dict key fails')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
|
||||||
|
msg='use `default` if key fails and `default`')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
|
||||||
|
msg='remove empty values when dict key')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
|
||||||
|
msg='use `default` when dict key and a default')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
|
||||||
|
msg='remove empty values when nested dict key fails')
|
||||||
|
self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
|
||||||
|
msg='default to dict if pruned')
|
||||||
|
self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
|
||||||
|
msg='default to dict if pruned and default is given')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}},
|
||||||
|
msg='use nested `default` when nested dict key fails and `default`')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {},
|
||||||
|
msg='remove key if branch in dict key not successful')
|
||||||
|
|
||||||
|
# Testing default parameter behavior
|
||||||
|
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
|
||||||
|
msg='default value should be `None`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
|
||||||
|
msg='chained fails should result in default')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
|
||||||
|
msg='should not short cirquit on `None`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
|
||||||
|
msg='invalid dict key should result in `default`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
|
||||||
|
msg='`None` is a deliberate sentinel and should become `default`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
|
||||||
|
msg='`IndexError` should result in `default`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
|
||||||
|
msg='if branched but not successful return `default` if defined, not `[]`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
|
||||||
|
msg='if branched but not successful return `default` even if `default` is `None`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
|
||||||
|
msg='if branched but not successful return `[]`, not `default`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
|
||||||
|
msg='if branched but object is empty return `[]`, not `default`')
|
||||||
|
self.assertEqual(traverse_obj(None, Ellipsis), [],
|
||||||
|
msg='if branched but object is `None` return `[]`, not `default`')
|
||||||
|
self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [],
|
||||||
|
msg='if branched but state is `None` return `[]`, not `default`')
|
||||||
|
|
||||||
|
branching_paths = [
|
||||||
|
('fail', Ellipsis),
|
||||||
|
(Ellipsis, 'fail'),
|
||||||
|
100 * ('fail',) + (Ellipsis,),
|
||||||
|
(Ellipsis,) + 100 * ('fail',),
|
||||||
|
]
|
||||||
|
for branching_path in branching_paths:
|
||||||
|
self.assertEqual(traverse_obj({}, branching_path), [],
|
||||||
|
msg='if branched but state is `None`, return `[]` (not `default`)')
|
||||||
|
self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
|
||||||
|
msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
|
||||||
|
self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
|
||||||
|
msg='if branching in last alternative and previous did match, return single value')
|
||||||
|
self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
|
||||||
|
msg='if branching in first alternative and non-branching path does match, return single value')
|
||||||
|
self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
|
||||||
|
msg='if branching in first alternative and non-branching path does not match, return `default`')
|
||||||
|
|
||||||
|
# Testing expected_type behavior
|
||||||
|
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
||||||
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
|
||||||
|
'str', msg='accept matching `expected_type` type')
|
||||||
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
|
||||||
|
None, msg='reject non-matching `expected_type` type')
|
||||||
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
|
||||||
|
'0', msg='transform type using type function')
|
||||||
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
|
||||||
|
None, msg='wrap expected_type function in try_call')
|
||||||
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str),
|
||||||
|
['str'], msg='eliminate items that expected_type fails on')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
|
||||||
|
{0: 100}, msg='type as expected_type should filter dict values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
|
||||||
|
{0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
|
||||||
|
1, msg='expected_type should not filter non-final dict values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
|
||||||
|
{0: {0: 100}}, msg='expected_type should transform deep dict values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
|
||||||
|
[{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values')
|
||||||
|
self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
|
||||||
|
[4], msg='expected_type regression for type matching in tuple branching')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int),
|
||||||
|
[], msg='expected_type regression for type matching in dict result')
|
||||||
|
|
||||||
|
# Test get_all behavior
|
||||||
|
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
||||||
|
self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
|
||||||
|
msg='if not `get_all`, return only first matching value')
|
||||||
|
self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
|
||||||
|
msg='do not overflatten if not `get_all`')
|
||||||
|
|
||||||
|
# Test casesense behavior
|
||||||
|
_CASESENSE_DATA = {
|
||||||
|
'KeY': 'value0',
|
||||||
|
0: {
|
||||||
|
'KeY': 'value1',
|
||||||
|
0: {'KeY': 'value2'},
|
||||||
|
},
|
||||||
|
# FULLWIDTH LATIN CAPITAL LETTER K
|
||||||
|
'\uff2bey': 'value3',
|
||||||
|
}
|
||||||
|
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
|
||||||
|
msg='dict keys should be case sensitive unless `casesense`')
|
||||||
|
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
|
||||||
|
casesense=False), 'value0',
|
||||||
|
msg='allow non matching key case if `casesense`')
|
||||||
|
self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey', # FULLWIDTH LATIN SMALL LETTER K
|
||||||
|
casesense=False), 'value3',
|
||||||
|
msg='allow non matching Unicode key case if `casesense`')
|
||||||
|
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
|
||||||
|
casesense=False), ['value1'],
|
||||||
|
msg='allow non matching key case in branch if `casesense`')
|
||||||
|
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
|
||||||
|
casesense=False), ['value2'],
|
||||||
|
msg='allow non matching key case in branch path if `casesense`')
|
||||||
|
|
||||||
|
# Test traverse_string behavior
|
||||||
|
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
|
||||||
|
msg='do not traverse into string if not `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
|
||||||
|
_traverse_string=True), 's',
|
||||||
|
msg='traverse into string if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
|
||||||
|
_traverse_string=True), '.',
|
||||||
|
msg='traverse into converted data if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
|
||||||
|
_traverse_string=True), 'str',
|
||||||
|
msg='`...` should result in string (same value) if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
|
||||||
|
_traverse_string=True), 'sr',
|
||||||
|
msg='`slice` should result in string if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'),
|
||||||
|
_traverse_string=True), 'str',
|
||||||
|
msg='function should result in string if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
||||||
|
_traverse_string=True), ['s', 'r'],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
|
||||||
|
# Test re.Match as input obj
|
||||||
|
mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
|
||||||
|
self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
|
||||||
|
msg='`...` on a `re.Match` should give its `groups()`')
|
||||||
|
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
|
||||||
|
msg='function on a `re.Match` should give groupno, value starting at 0')
|
||||||
|
self.assertEqual(traverse_obj(mobj, 'group'), '3',
|
||||||
|
msg='str key on a `re.Match` should give group with that name')
|
||||||
|
self.assertEqual(traverse_obj(mobj, 2), '3',
|
||||||
|
msg='int key on a `re.Match` should give group with that name')
|
||||||
|
self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
|
||||||
|
msg='str key on a `re.Match` should respect casesense')
|
||||||
|
self.assertEqual(traverse_obj(mobj, 'fail'), None,
|
||||||
|
msg='failing str key on a `re.Match` should return `default`')
|
||||||
|
self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
|
||||||
|
msg='failing str key on a `re.Match` should return `default`')
|
||||||
|
self.assertEqual(traverse_obj(mobj, 8), None,
|
||||||
|
msg='failing int key on a `re.Match` should return `default`')
|
||||||
|
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
|
||||||
|
msg='function on a `re.Match` should give group name as well')
|
||||||
|
|
||||||
|
# Test xml.etree.ElementTree.Element as input obj
|
||||||
|
etree = compat_etree_fromstring('''<?xml version="1.0"?>
|
||||||
|
<data>
|
||||||
|
<country name="Liechtenstein">
|
||||||
|
<rank>1</rank>
|
||||||
|
<year>2008</year>
|
||||||
|
<gdppc>141100</gdppc>
|
||||||
|
<neighbor name="Austria" direction="E"/>
|
||||||
|
<neighbor name="Switzerland" direction="W"/>
|
||||||
|
</country>
|
||||||
|
<country name="Singapore">
|
||||||
|
<rank>4</rank>
|
||||||
|
<year>2011</year>
|
||||||
|
<gdppc>59900</gdppc>
|
||||||
|
<neighbor name="Malaysia" direction="N"/>
|
||||||
|
</country>
|
||||||
|
<country name="Panama">
|
||||||
|
<rank>68</rank>
|
||||||
|
<year>2011</year>
|
||||||
|
<gdppc>13600</gdppc>
|
||||||
|
<neighbor name="Costa Rica" direction="W"/>
|
||||||
|
<neighbor name="Colombia" direction="E"/>
|
||||||
|
</country>
|
||||||
|
</data>''')
|
||||||
|
self.assertEqual(traverse_obj(etree, ''), etree,
|
||||||
|
msg='empty str key should return the element itself')
|
||||||
|
self.assertEqual(traverse_obj(etree, 'country'), list(etree),
|
||||||
|
msg='str key should return all children with that tag name')
|
||||||
|
self.assertEqual(traverse_obj(etree, Ellipsis), list(etree),
|
||||||
|
msg='`...` as key should return all children')
|
||||||
|
self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
|
||||||
|
msg='function as key should get element as value')
|
||||||
|
self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
|
||||||
|
msg='function as key should get index as key')
|
||||||
|
self.assertEqual(traverse_obj(etree, 0), etree[0],
|
||||||
|
msg='int key should return the nth child')
|
||||||
|
self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
|
||||||
|
['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
|
||||||
|
msg='`@<attribute>` at end of path should give that attribute')
|
||||||
|
self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
|
||||||
|
msg='`@<nonexistent>` at end of path should give `None`')
|
||||||
|
self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
|
||||||
|
msg='`@` should give the full attribute dict')
|
||||||
|
self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
|
||||||
|
msg='`text()` at end of path should give the inner text')
|
||||||
|
self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
|
||||||
|
msg='full python xpath features should be supported')
|
||||||
|
self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
|
||||||
|
msg='special transformations should act on current element')
|
||||||
|
self.assertEqual(traverse_obj(etree, ('country', 0, Ellipsis, 'text()', T(int_or_none))), [1, 2008, 141100],
|
||||||
|
msg='special transformations should act on current element')
|
||||||
|
|
||||||
|
def test_traversal_unbranching(self):
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), all]), [100, 1.2],
|
||||||
|
msg='`all` should give all results as list')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), any]), 100,
|
||||||
|
msg='`any` should give the first result')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [100, all]), [100],
|
||||||
|
msg='`all` should give list if non branching')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [100, any]), 100,
|
||||||
|
msg='`any` should give single item if non branching')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), all]), [100],
|
||||||
|
msg='`all` should filter `None` and empty dict')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), any]), 100,
|
||||||
|
msg='`any` should filter `None` and empty dict')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [{
|
||||||
|
'all': [('dict', 'None', 100, 1.2), all],
|
||||||
|
'any': [('dict', 'None', 100, 1.2), any],
|
||||||
|
}]), {'all': [100, 1.2], 'any': 100},
|
||||||
|
msg='`all`/`any` should apply to each dict path separately')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [{
|
||||||
|
'all': [('dict', 'None', 100, 1.2), all],
|
||||||
|
'any': [('dict', 'None', 100, 1.2), any],
|
||||||
|
}], get_all=False), {'all': [100, 1.2], 'any': 100},
|
||||||
|
msg='`all`/`any` should apply to dict regardless of `get_all`')
|
||||||
|
self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, T(float)]), None,
|
||||||
|
msg='`all` should reset branching status')
|
||||||
|
self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), any, T(float)]), None,
|
||||||
|
msg='`any` should reset branching status')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, Ellipsis, T(float)]), [1.2],
|
||||||
|
msg='`all` should allow further branching')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 'urls', 'data'), any, Ellipsis, 'index']), [0, 1],
|
||||||
|
msg='`any` should allow further branching')
|
||||||
|
|
||||||
|
def test_traversal_morsel(self):
|
||||||
|
values = {
|
||||||
|
'expires': 'a',
|
||||||
|
'path': 'b',
|
||||||
|
'comment': 'c',
|
||||||
|
'domain': 'd',
|
||||||
|
'max-age': 'e',
|
||||||
|
'secure': 'f',
|
||||||
|
'httponly': 'g',
|
||||||
|
'version': 'h',
|
||||||
|
'samesite': 'i',
|
||||||
|
}
|
||||||
|
# SameSite added in Py3.8, breaks .update for 3.5-3.7
|
||||||
|
if sys.version_info < (3, 8):
|
||||||
|
del values['samesite']
|
||||||
|
morsel = compat_http_cookies.Morsel()
|
||||||
|
morsel.set(str('item_key'), 'item_value', 'coded_value')
|
||||||
|
morsel.update(values)
|
||||||
|
values['key'] = str('item_key')
|
||||||
|
values['value'] = 'item_value'
|
||||||
|
values = dict((str(k), v) for k, v in values.items())
|
||||||
|
# make test pass even without ordered dict
|
||||||
|
value_set = set(values.values())
|
||||||
|
|
||||||
|
for key, value in values.items():
|
||||||
|
self.assertEqual(traverse_obj(morsel, key), value,
|
||||||
|
msg='Morsel should provide access to all values')
|
||||||
|
self.assertEqual(set(traverse_obj(morsel, Ellipsis)), value_set,
|
||||||
|
msg='`...` should yield all values')
|
||||||
|
self.assertEqual(set(traverse_obj(morsel, lambda k, v: True)), value_set,
|
||||||
|
msg='function key should yield all values')
|
||||||
|
self.assertIs(traverse_obj(morsel, [(None,), any]), morsel,
|
||||||
|
msg='Morsel should not be implicitly changed to dict on usage')
|
||||||
|
|
||||||
|
def test_get_first(self):
|
||||||
|
self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
|
||||||
|
|
||||||
|
def test_dict_get(self):
|
||||||
|
FALSE_VALUES = {
|
||||||
|
'none': None,
|
||||||
|
'false': False,
|
||||||
|
'zero': 0,
|
||||||
|
'empty_string': '',
|
||||||
|
'empty_list': [],
|
||||||
|
}
|
||||||
|
d = FALSE_VALUES.copy()
|
||||||
|
d['a'] = 42
|
||||||
|
self.assertEqual(dict_get(d, 'a'), 42)
|
||||||
|
self.assertEqual(dict_get(d, 'b'), None)
|
||||||
|
self.assertEqual(dict_get(d, 'b', 42), 42)
|
||||||
|
self.assertEqual(dict_get(d, ('a', )), 42)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', )), None)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
|
||||||
|
for key, false_value in FALSE_VALUES.items():
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -14,10 +14,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
import io
|
import io
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import re
|
import types
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
|
_UnsafeExtensionError,
|
||||||
age_restricted,
|
age_restricted,
|
||||||
args_to_str,
|
args_to_str,
|
||||||
base_url,
|
base_url,
|
||||||
@ -28,7 +29,6 @@ from youtube_dl.utils import (
|
|||||||
DateRange,
|
DateRange,
|
||||||
detect_exe_version,
|
detect_exe_version,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
|
||||||
encode_base_n,
|
encode_base_n,
|
||||||
encode_compat_str,
|
encode_compat_str,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
@ -44,7 +44,6 @@ from youtube_dl.utils import (
|
|||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
get_elements_by_class,
|
get_elements_by_class,
|
||||||
get_elements_by_attribute,
|
get_elements_by_attribute,
|
||||||
get_first,
|
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
@ -84,14 +83,11 @@ from youtube_dl.utils import (
|
|||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
|
||||||
str_to_int,
|
str_to_int,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
T,
|
|
||||||
timeconvert,
|
timeconvert,
|
||||||
traverse_obj,
|
|
||||||
try_call,
|
try_call,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -132,10 +128,6 @@ from youtube_dl.compat import (
|
|||||||
|
|
||||||
class TestUtil(unittest.TestCase):
|
class TestUtil(unittest.TestCase):
|
||||||
|
|
||||||
# yt-dlp shim
|
|
||||||
def assertCountEqual(self, expected, got, msg='count should be the same'):
|
|
||||||
return self.assertEqual(len(tuple(expected)), len(tuple(got)), msg=msg)
|
|
||||||
|
|
||||||
def test_timeconvert(self):
|
def test_timeconvert(self):
|
||||||
self.assertTrue(timeconvert('') is None)
|
self.assertTrue(timeconvert('') is None)
|
||||||
self.assertTrue(timeconvert('bougrg') is None)
|
self.assertTrue(timeconvert('bougrg') is None)
|
||||||
@ -280,6 +272,27 @@ class TestUtil(unittest.TestCase):
|
|||||||
expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
|
expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
|
||||||
'%s/expanded' % compat_getenv('HOME'))
|
'%s/expanded' % compat_getenv('HOME'))
|
||||||
|
|
||||||
|
_uncommon_extensions = [
|
||||||
|
('exe', 'abc.exe.ext'),
|
||||||
|
('de', 'abc.de.ext'),
|
||||||
|
('../.mp4', None),
|
||||||
|
('..\\.mp4', None),
|
||||||
|
]
|
||||||
|
|
||||||
|
def assertUnsafeExtension(self, ext=None):
|
||||||
|
assert_raises = self.assertRaises(_UnsafeExtensionError)
|
||||||
|
assert_raises.ext = ext
|
||||||
|
orig_exit = assert_raises.__exit__
|
||||||
|
|
||||||
|
def my_exit(self_, exc_type, exc_val, exc_tb):
|
||||||
|
did_raise = orig_exit(exc_type, exc_val, exc_tb)
|
||||||
|
if did_raise and assert_raises.ext is not None:
|
||||||
|
self.assertEqual(assert_raises.ext, assert_raises.exception.extension, 'Unsafe extension not as unexpected')
|
||||||
|
return did_raise
|
||||||
|
|
||||||
|
assert_raises.__exit__ = types.MethodType(my_exit, assert_raises)
|
||||||
|
return assert_raises
|
||||||
|
|
||||||
def test_prepend_extension(self):
|
def test_prepend_extension(self):
|
||||||
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
|
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
|
||||||
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
|
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
|
||||||
@ -288,6 +301,19 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
|
self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
|
||||||
self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
|
self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
|
||||||
|
|
||||||
|
# Test uncommon extensions
|
||||||
|
self.assertEqual(prepend_extension('abc.ext', 'bin'), 'abc.bin.ext')
|
||||||
|
for ext, result in self._uncommon_extensions:
|
||||||
|
with self.assertUnsafeExtension(ext):
|
||||||
|
prepend_extension('abc', ext)
|
||||||
|
if result:
|
||||||
|
self.assertEqual(prepend_extension('abc.ext', ext, 'ext'), result)
|
||||||
|
else:
|
||||||
|
with self.assertUnsafeExtension(ext):
|
||||||
|
prepend_extension('abc.ext', ext, 'ext')
|
||||||
|
with self.assertUnsafeExtension(ext):
|
||||||
|
prepend_extension('abc.unexpected_ext', ext, 'ext')
|
||||||
|
|
||||||
def test_replace_extension(self):
|
def test_replace_extension(self):
|
||||||
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
|
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
|
||||||
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
|
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
|
||||||
@ -296,6 +322,16 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
||||||
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
||||||
|
|
||||||
|
# Test uncommon extensions
|
||||||
|
self.assertEqual(replace_extension('abc.ext', 'bin'), 'abc.unknown_video')
|
||||||
|
for ext, _ in self._uncommon_extensions:
|
||||||
|
with self.assertUnsafeExtension(ext):
|
||||||
|
replace_extension('abc', ext)
|
||||||
|
with self.assertUnsafeExtension(ext):
|
||||||
|
replace_extension('abc.ext', ext, 'ext')
|
||||||
|
with self.assertUnsafeExtension(ext):
|
||||||
|
replace_extension('abc.unexpected_ext', ext, 'ext')
|
||||||
|
|
||||||
def test_subtitles_filename(self):
|
def test_subtitles_filename(self):
|
||||||
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
|
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
|
||||||
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
|
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
|
||||||
@ -740,28 +776,6 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertRaises(
|
self.assertRaises(
|
||||||
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
|
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
|
||||||
|
|
||||||
def test_dict_get(self):
|
|
||||||
FALSE_VALUES = {
|
|
||||||
'none': None,
|
|
||||||
'false': False,
|
|
||||||
'zero': 0,
|
|
||||||
'empty_string': '',
|
|
||||||
'empty_list': [],
|
|
||||||
}
|
|
||||||
d = FALSE_VALUES.copy()
|
|
||||||
d['a'] = 42
|
|
||||||
self.assertEqual(dict_get(d, 'a'), 42)
|
|
||||||
self.assertEqual(dict_get(d, 'b'), None)
|
|
||||||
self.assertEqual(dict_get(d, 'b', 42), 42)
|
|
||||||
self.assertEqual(dict_get(d, ('a', )), 42)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', )), None)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
|
|
||||||
for key, false_value in FALSE_VALUES.items():
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
|
|
||||||
|
|
||||||
def test_merge_dicts(self):
|
def test_merge_dicts(self):
|
||||||
self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
|
self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
|
||||||
self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
|
self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
|
||||||
@ -1703,336 +1717,6 @@ Line 1
|
|||||||
self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
|
self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
|
||||||
self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
|
self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
|
||||||
|
|
||||||
def test_traverse_obj(self):
|
|
||||||
str = compat_str
|
|
||||||
_TEST_DATA = {
|
|
||||||
100: 100,
|
|
||||||
1.2: 1.2,
|
|
||||||
'str': 'str',
|
|
||||||
'None': None,
|
|
||||||
'...': Ellipsis,
|
|
||||||
'urls': [
|
|
||||||
{'index': 0, 'url': 'https://www.example.com/0'},
|
|
||||||
{'index': 1, 'url': 'https://www.example.com/1'},
|
|
||||||
],
|
|
||||||
'data': (
|
|
||||||
{'index': 2},
|
|
||||||
{'index': 3},
|
|
||||||
),
|
|
||||||
'dict': {},
|
|
||||||
}
|
|
||||||
|
|
||||||
# define a pukka Iterable
|
|
||||||
def iter_range(stop):
|
|
||||||
for from_ in range(stop):
|
|
||||||
yield from_
|
|
||||||
|
|
||||||
# Test base functionality
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
|
||||||
msg='allow tuple path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
|
|
||||||
msg='allow list path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
|
|
||||||
msg='allow iterable path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
|
|
||||||
msg='single items should be treated as a path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
|
|
||||||
|
|
||||||
# Test Ellipsis behavior
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
|
|
||||||
(item for item in _TEST_DATA.values() if item not in (None, {})),
|
|
||||||
msg='`...` should give all non-discarded values')
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
|
|
||||||
msg='`...` selection for dicts should select all values')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
|
|
||||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
|
||||||
msg='nested `...` queries should work')
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4),
|
|
||||||
msg='`...` query result should be flattened')
|
|
||||||
self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
|
|
||||||
msg='`...` should accept iterables')
|
|
||||||
|
|
||||||
# Test function as key
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
|
||||||
[_TEST_DATA['urls']],
|
|
||||||
msg='function as query key should perform a filter based on (key, value)')
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
|
|
||||||
msg='exceptions in the query function should be caught')
|
|
||||||
self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
|
|
||||||
msg='function key should accept iterables')
|
|
||||||
if __debug__:
|
|
||||||
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
|
||||||
traverse_obj(_TEST_DATA, lambda a: Ellipsis)
|
|
||||||
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
|
||||||
traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis)
|
|
||||||
|
|
||||||
# Test set as key (transformation/type, like `expected_type`)
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'],
|
|
||||||
msg='Function in set should be a transformation')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'],
|
|
||||||
msg='Type in set should be a type filter')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA,
|
|
||||||
msg='A single set should be wrapped into a path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'],
|
|
||||||
msg='Transformation function should not raise')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
|
|
||||||
[item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
|
|
||||||
msg='Function in set should be a transformation')
|
|
||||||
if __debug__:
|
|
||||||
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
|
||||||
traverse_obj(_TEST_DATA, set())
|
|
||||||
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
|
||||||
traverse_obj(_TEST_DATA, set((str.upper, str)))
|
|
||||||
|
|
||||||
# Test `slice` as a key
|
|
||||||
_SLICE_DATA = [0, 1, 2, 3, 4]
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
|
|
||||||
msg='slice on a dictionary should not throw')
|
|
||||||
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
|
|
||||||
msg='slice key should apply slice to sequence')
|
|
||||||
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
|
|
||||||
msg='slice key should apply slice to sequence')
|
|
||||||
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
|
|
||||||
msg='slice key should apply slice to sequence')
|
|
||||||
|
|
||||||
# Test alternative paths
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
|
||||||
msg='multiple `paths` should be treated as alternative paths')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
|
|
||||||
msg='alternatives should exit early')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
|
|
||||||
msg='alternatives should return `default` if exhausted')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
|
|
||||||
msg='alternatives should track their own branching return')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
|
|
||||||
msg='alternatives on empty objects should search further')
|
|
||||||
|
|
||||||
# Test branch and path nesting
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
|
|
||||||
msg='tuple as key should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
|
|
||||||
msg='list as key should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
|
|
||||||
msg='double nesting in path should be treated as paths')
|
|
||||||
self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
|
|
||||||
msg='do not fail early on branching')
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
|
|
||||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
|
||||||
msg='triple nesting in path should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
|
|
||||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
|
||||||
msg='ellipsis as branch path start gets flattened')
|
|
||||||
|
|
||||||
# Test dictionary as key
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
|
|
||||||
msg='dict key should result in a dict with the same keys')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
|
|
||||||
{0: 'https://www.example.com/0'},
|
|
||||||
msg='dict key should allow paths')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
|
|
||||||
{0: ['https://www.example.com/0']},
|
|
||||||
msg='tuple in dict path should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
|
|
||||||
{0: ['https://www.example.com/0']},
|
|
||||||
msg='double nesting in dict path should be treated as paths')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
|
|
||||||
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
|
|
||||||
msg='triple nesting in dict path should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
|
|
||||||
msg='remove `None` values when top level dict key fails')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
|
|
||||||
msg='use `default` if key fails and `default`')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
|
|
||||||
msg='remove empty values when dict key')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
|
|
||||||
msg='use `default` when dict key and a default')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
|
|
||||||
msg='remove empty values when nested dict key fails')
|
|
||||||
self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
|
|
||||||
msg='default to dict if pruned')
|
|
||||||
self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
|
|
||||||
msg='default to dict if pruned and default is given')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}},
|
|
||||||
msg='use nested `default` when nested dict key fails and `default`')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {},
|
|
||||||
msg='remove key if branch in dict key not successful')
|
|
||||||
|
|
||||||
# Testing default parameter behavior
|
|
||||||
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
|
|
||||||
msg='default value should be `None`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
|
|
||||||
msg='chained fails should result in default')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
|
|
||||||
msg='should not short cirquit on `None`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
|
|
||||||
msg='invalid dict key should result in `default`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
|
|
||||||
msg='`None` is a deliberate sentinel and should become `default`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
|
|
||||||
msg='`IndexError` should result in `default`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
|
|
||||||
msg='if branched but not successful return `default` if defined, not `[]`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
|
|
||||||
msg='if branched but not successful return `default` even if `default` is `None`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
|
|
||||||
msg='if branched but not successful return `[]`, not `default`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
|
|
||||||
msg='if branched but object is empty return `[]`, not `default`')
|
|
||||||
self.assertEqual(traverse_obj(None, Ellipsis), [],
|
|
||||||
msg='if branched but object is `None` return `[]`, not `default`')
|
|
||||||
self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [],
|
|
||||||
msg='if branched but state is `None` return `[]`, not `default`')
|
|
||||||
|
|
||||||
branching_paths = [
|
|
||||||
('fail', Ellipsis),
|
|
||||||
(Ellipsis, 'fail'),
|
|
||||||
100 * ('fail',) + (Ellipsis,),
|
|
||||||
(Ellipsis,) + 100 * ('fail',),
|
|
||||||
]
|
|
||||||
for branching_path in branching_paths:
|
|
||||||
self.assertEqual(traverse_obj({}, branching_path), [],
|
|
||||||
msg='if branched but state is `None`, return `[]` (not `default`)')
|
|
||||||
self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
|
|
||||||
msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
|
|
||||||
self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
|
|
||||||
msg='if branching in last alternative and previous did match, return single value')
|
|
||||||
self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
|
|
||||||
msg='if branching in first alternative and non-branching path does match, return single value')
|
|
||||||
self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
|
|
||||||
msg='if branching in first alternative and non-branching path does not match, return `default`')
|
|
||||||
|
|
||||||
# Testing expected_type behavior
|
|
||||||
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
|
|
||||||
'str', msg='accept matching `expected_type` type')
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
|
|
||||||
None, msg='reject non-matching `expected_type` type')
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
|
|
||||||
'0', msg='transform type using type function')
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
|
|
||||||
None, msg='wrap expected_type function in try_call')
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str),
|
|
||||||
['str'], msg='eliminate items that expected_type fails on')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
|
|
||||||
{0: 100}, msg='type as expected_type should filter dict values')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
|
|
||||||
{0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
|
|
||||||
1, msg='expected_type should not filter non-final dict values')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
|
|
||||||
{0: {0: 100}}, msg='expected_type should transform deep dict values')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
|
|
||||||
[{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values')
|
|
||||||
self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
|
|
||||||
[4], msg='expected_type regression for type matching in tuple branching')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int),
|
|
||||||
[], msg='expected_type regression for type matching in dict result')
|
|
||||||
|
|
||||||
# Test get_all behavior
|
|
||||||
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
|
||||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
|
|
||||||
msg='if not `get_all`, return only first matching value')
|
|
||||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
|
|
||||||
msg='do not overflatten if not `get_all`')
|
|
||||||
|
|
||||||
# Test casesense behavior
|
|
||||||
_CASESENSE_DATA = {
|
|
||||||
'KeY': 'value0',
|
|
||||||
0: {
|
|
||||||
'KeY': 'value1',
|
|
||||||
0: {'KeY': 'value2'},
|
|
||||||
},
|
|
||||||
# FULLWIDTH LATIN CAPITAL LETTER K
|
|
||||||
'\uff2bey': 'value3',
|
|
||||||
}
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
|
|
||||||
msg='dict keys should be case sensitive unless `casesense`')
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
|
|
||||||
casesense=False), 'value0',
|
|
||||||
msg='allow non matching key case if `casesense`')
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey', # FULLWIDTH LATIN SMALL LETTER K
|
|
||||||
casesense=False), 'value3',
|
|
||||||
msg='allow non matching Unicode key case if `casesense`')
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
|
|
||||||
casesense=False), ['value1'],
|
|
||||||
msg='allow non matching key case in branch if `casesense`')
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
|
|
||||||
casesense=False), ['value2'],
|
|
||||||
msg='allow non matching key case in branch path if `casesense`')
|
|
||||||
|
|
||||||
# Test traverse_string behavior
|
|
||||||
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
|
|
||||||
msg='do not traverse into string if not `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
|
|
||||||
_traverse_string=True), 's',
|
|
||||||
msg='traverse into string if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
|
|
||||||
_traverse_string=True), '.',
|
|
||||||
msg='traverse into converted data if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
|
|
||||||
_traverse_string=True), 'str',
|
|
||||||
msg='`...` should result in string (same value) if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
|
|
||||||
_traverse_string=True), 'sr',
|
|
||||||
msg='`slice` should result in string if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'),
|
|
||||||
_traverse_string=True), 'str',
|
|
||||||
msg='function should result in string if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
|
||||||
_traverse_string=True), ['s', 'r'],
|
|
||||||
msg='branching should result in list if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [],
|
|
||||||
msg='branching should result in list if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [],
|
|
||||||
msg='branching should result in list if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [],
|
|
||||||
msg='branching should result in list if `traverse_string`')
|
|
||||||
|
|
||||||
# Test is_user_input behavior
|
|
||||||
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
|
|
||||||
self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
|
|
||||||
_is_user_input=True), 3,
|
|
||||||
msg='allow for string indexing if `is_user_input`')
|
|
||||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
|
|
||||||
_is_user_input=True), tuple(range(8))[3:],
|
|
||||||
msg='allow for string slice if `is_user_input`')
|
|
||||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
|
|
||||||
_is_user_input=True), tuple(range(8))[:4:2],
|
|
||||||
msg='allow step in string slice if `is_user_input`')
|
|
||||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
|
|
||||||
_is_user_input=True), range(8),
|
|
||||||
msg='`:` should be treated as `...` if `is_user_input`')
|
|
||||||
with self.assertRaises(TypeError, msg='too many params should result in error'):
|
|
||||||
traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), _is_user_input=True)
|
|
||||||
|
|
||||||
# Test re.Match as input obj
|
|
||||||
mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
|
|
||||||
self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
|
|
||||||
msg='`...` on a `re.Match` should give its `groups()`')
|
|
||||||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
|
|
||||||
msg='function on a `re.Match` should give groupno, value starting at 0')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 'group'), '3',
|
|
||||||
msg='str key on a `re.Match` should give group with that name')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 2), '3',
|
|
||||||
msg='int key on a `re.Match` should give group with that name')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
|
|
||||||
msg='str key on a `re.Match` should respect casesense')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 'fail'), None,
|
|
||||||
msg='failing str key on a `re.Match` should return `default`')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
|
|
||||||
msg='failing str key on a `re.Match` should return `default`')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 8), None,
|
|
||||||
msg='failing int key on a `re.Match` should return `default`')
|
|
||||||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
|
|
||||||
msg='function on a `re.Match` should give group name as well')
|
|
||||||
|
|
||||||
def test_get_first(self):
|
|
||||||
self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
|
|
||||||
|
|
||||||
def test_join_nonempty(self):
|
def test_join_nonempty(self):
|
||||||
self.assertEqual(join_nonempty('a', 'b'), 'a-b')
|
self.assertEqual(join_nonempty('a', 'b'), 'a-b')
|
||||||
self.assertEqual(join_nonempty(
|
self.assertEqual(join_nonempty(
|
||||||
|
@ -158,6 +158,30 @@ _NSIG_TESTS = [
|
|||||||
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
|
||||||
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
|
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
|
||||||
|
'1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
|
||||||
|
'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
|
||||||
|
'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
|
||||||
|
'7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
|
||||||
|
'-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
|
||||||
|
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ import collections
|
|||||||
import copy
|
import copy
|
||||||
import datetime
|
import datetime
|
||||||
import errno
|
import errno
|
||||||
|
import functools
|
||||||
import io
|
import io
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
@ -53,6 +54,7 @@ from .compat import (
|
|||||||
compat_urllib_request_DataHandler,
|
compat_urllib_request_DataHandler,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
|
_UnsafeExtensionError,
|
||||||
age_restricted,
|
age_restricted,
|
||||||
args_to_str,
|
args_to_str,
|
||||||
bug_reports_message,
|
bug_reports_message,
|
||||||
@ -129,6 +131,20 @@ if compat_os_name == 'nt':
|
|||||||
import ctypes
|
import ctypes
|
||||||
|
|
||||||
|
|
||||||
|
def _catch_unsafe_file_extension(func):
|
||||||
|
@functools.wraps(func)
|
||||||
|
def wrapper(self, *args, **kwargs):
|
||||||
|
try:
|
||||||
|
return func(self, *args, **kwargs)
|
||||||
|
except _UnsafeExtensionError as error:
|
||||||
|
self.report_error(
|
||||||
|
'{0} found; to avoid damaging your system, this value is disallowed.'
|
||||||
|
' If you believe this is an error{1}'.format(
|
||||||
|
error_to_compat_str(error), bug_reports_message(',')))
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDL(object):
|
class YoutubeDL(object):
|
||||||
"""YoutubeDL class.
|
"""YoutubeDL class.
|
||||||
|
|
||||||
@ -1039,8 +1055,8 @@ class YoutubeDL(object):
|
|||||||
elif result_type in ('playlist', 'multi_video'):
|
elif result_type in ('playlist', 'multi_video'):
|
||||||
# Protect from infinite recursion due to recursively nested playlists
|
# Protect from infinite recursion due to recursively nested playlists
|
||||||
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
|
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
|
||||||
webpage_url = ie_result['webpage_url']
|
webpage_url = ie_result.get('webpage_url') # not all pl/mv have this
|
||||||
if webpage_url in self._playlist_urls:
|
if webpage_url and webpage_url in self._playlist_urls:
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[download] Skipping already downloaded playlist: %s'
|
'[download] Skipping already downloaded playlist: %s'
|
||||||
% ie_result.get('title') or ie_result.get('id'))
|
% ie_result.get('title') or ie_result.get('id'))
|
||||||
@ -1048,6 +1064,10 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
self._playlist_level += 1
|
self._playlist_level += 1
|
||||||
self._playlist_urls.add(webpage_url)
|
self._playlist_urls.add(webpage_url)
|
||||||
|
new_result = dict((k, v) for k, v in extra_info.items() if k not in ie_result)
|
||||||
|
if new_result:
|
||||||
|
new_result.update(ie_result)
|
||||||
|
ie_result = new_result
|
||||||
try:
|
try:
|
||||||
return self.__process_playlist(ie_result, download)
|
return self.__process_playlist(ie_result, download)
|
||||||
finally:
|
finally:
|
||||||
@ -1593,6 +1613,28 @@ class YoutubeDL(object):
|
|||||||
self.cookiejar.add_cookie_header(pr)
|
self.cookiejar.add_cookie_header(pr)
|
||||||
return pr.get_header('Cookie')
|
return pr.get_header('Cookie')
|
||||||
|
|
||||||
|
def _fill_common_fields(self, info_dict, final=True):
|
||||||
|
|
||||||
|
for ts_key, date_key in (
|
||||||
|
('timestamp', 'upload_date'),
|
||||||
|
('release_timestamp', 'release_date'),
|
||||||
|
):
|
||||||
|
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
|
||||||
|
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||||
|
# see http://bugs.python.org/issue1646728)
|
||||||
|
try:
|
||||||
|
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||||
|
info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
|
||||||
|
except (ValueError, OverflowError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Auto generate title fields corresponding to the *_number fields when missing
|
||||||
|
# in order to always have clean titles. This is very common for TV series.
|
||||||
|
if final:
|
||||||
|
for field in ('chapter', 'season', 'episode'):
|
||||||
|
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||||
|
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||||
|
|
||||||
def process_video_result(self, info_dict, download=True):
|
def process_video_result(self, info_dict, download=True):
|
||||||
assert info_dict.get('_type', 'video') == 'video'
|
assert info_dict.get('_type', 'video') == 'video'
|
||||||
|
|
||||||
@ -1660,24 +1702,7 @@ class YoutubeDL(object):
|
|||||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||||
info_dict['display_id'] = info_dict['id']
|
info_dict['display_id'] = info_dict['id']
|
||||||
|
|
||||||
for ts_key, date_key in (
|
self._fill_common_fields(info_dict)
|
||||||
('timestamp', 'upload_date'),
|
|
||||||
('release_timestamp', 'release_date'),
|
|
||||||
):
|
|
||||||
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
|
|
||||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
|
||||||
# see http://bugs.python.org/issue1646728)
|
|
||||||
try:
|
|
||||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
|
||||||
info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
|
|
||||||
except (ValueError, OverflowError, OSError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Auto generate title fields corresponding to the *_number fields when missing
|
|
||||||
# in order to always have clean titles. This is very common for TV series.
|
|
||||||
for field in ('chapter', 'season', 'episode'):
|
|
||||||
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
|
||||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
|
||||||
|
|
||||||
for cc_kind in ('subtitles', 'automatic_captions'):
|
for cc_kind in ('subtitles', 'automatic_captions'):
|
||||||
cc = info_dict.get(cc_kind)
|
cc = info_dict.get(cc_kind)
|
||||||
@ -1916,6 +1941,7 @@ class YoutubeDL(object):
|
|||||||
if self.params.get('forcejson', False):
|
if self.params.get('forcejson', False):
|
||||||
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
|
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
|
||||||
|
|
||||||
|
@_catch_unsafe_file_extension
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
"""Process a single resolved IE result."""
|
"""Process a single resolved IE result."""
|
||||||
|
|
||||||
@ -2088,18 +2114,26 @@ class YoutubeDL(object):
|
|||||||
# TODO: Check acodec/vcodec
|
# TODO: Check acodec/vcodec
|
||||||
return False
|
return False
|
||||||
|
|
||||||
filename_real_ext = os.path.splitext(filename)[1][1:]
|
exts = [info_dict['ext']]
|
||||||
filename_wo_ext = (
|
|
||||||
os.path.splitext(filename)[0]
|
|
||||||
if filename_real_ext == info_dict['ext']
|
|
||||||
else filename)
|
|
||||||
requested_formats = info_dict['requested_formats']
|
requested_formats = info_dict['requested_formats']
|
||||||
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
|
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
|
||||||
info_dict['ext'] = 'mkv'
|
info_dict['ext'] = 'mkv'
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Requested formats are incompatible for merge and will be merged into mkv.')
|
'Requested formats are incompatible for merge and will be merged into mkv.')
|
||||||
|
exts.append(info_dict['ext'])
|
||||||
|
|
||||||
# Ensure filename always has a correct extension for successful merge
|
# Ensure filename always has a correct extension for successful merge
|
||||||
filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
|
def correct_ext(filename, ext=exts[1]):
|
||||||
|
if filename == '-':
|
||||||
|
return filename
|
||||||
|
f_name, f_real_ext = os.path.splitext(filename)
|
||||||
|
f_real_ext = f_real_ext[1:]
|
||||||
|
filename_wo_ext = f_name if f_real_ext in exts else filename
|
||||||
|
if ext is None:
|
||||||
|
ext = f_real_ext or None
|
||||||
|
return join_nonempty(filename_wo_ext, ext, delim='.')
|
||||||
|
|
||||||
|
filename = correct_ext(filename)
|
||||||
if os.path.exists(encodeFilename(filename)):
|
if os.path.exists(encodeFilename(filename)):
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[download] %s has already been downloaded and '
|
'[download] %s has already been downloaded and '
|
||||||
@ -2109,8 +2143,9 @@ class YoutubeDL(object):
|
|||||||
new_info = dict(info_dict)
|
new_info = dict(info_dict)
|
||||||
new_info.update(f)
|
new_info.update(f)
|
||||||
fname = prepend_extension(
|
fname = prepend_extension(
|
||||||
self.prepare_filename(new_info),
|
correct_ext(
|
||||||
'f%s' % f['format_id'], new_info['ext'])
|
self.prepare_filename(new_info), new_info['ext']),
|
||||||
|
'f%s' % (f['format_id'],), new_info['ext'])
|
||||||
if not ensure_dir_exists(fname):
|
if not ensure_dir_exists(fname):
|
||||||
return
|
return
|
||||||
downloaded.append(fname)
|
downloaded.append(fname)
|
||||||
|
@ -21,6 +21,7 @@ from .compat import (
|
|||||||
workaround_optparse_bug9161,
|
workaround_optparse_bug9161,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
|
_UnsafeExtensionError,
|
||||||
DateRange,
|
DateRange,
|
||||||
decodeOption,
|
decodeOption,
|
||||||
DEFAULT_OUTTMPL,
|
DEFAULT_OUTTMPL,
|
||||||
@ -173,6 +174,9 @@ def _real_main(argv=None):
|
|||||||
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
|
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
|
||||||
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
|
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
|
||||||
|
|
||||||
|
if opts.no_check_extensions:
|
||||||
|
_UnsafeExtensionError.lenient = True
|
||||||
|
|
||||||
def parse_retries(retries):
|
def parse_retries(retries):
|
||||||
if retries in ('inf', 'infinite'):
|
if retries in ('inf', 'infinite'):
|
||||||
parsed_retries = float('inf')
|
parsed_retries = float('inf')
|
||||||
|
@ -2719,8 +2719,222 @@ if sys.version_info < (2, 7):
|
|||||||
if isinstance(xpath, compat_str):
|
if isinstance(xpath, compat_str):
|
||||||
xpath = xpath.encode('ascii')
|
xpath = xpath.encode('ascii')
|
||||||
return xpath
|
return xpath
|
||||||
|
|
||||||
|
# further code below based on CPython 2.7 source
|
||||||
|
import functools
|
||||||
|
|
||||||
|
_xpath_tokenizer_re = re.compile(r'''(?x)
|
||||||
|
( # (1)
|
||||||
|
'[^']*'|"[^"]*"| # quoted strings, or
|
||||||
|
::|//?|\.\.|\(\)|[/.*:[\]()@=] # navigation specials
|
||||||
|
)| # or (2)
|
||||||
|
((?:\{[^}]+\})?[^/[\]()@=\s]+)| # token: optional {ns}, no specials
|
||||||
|
\s+ # or white space
|
||||||
|
''')
|
||||||
|
|
||||||
|
def _xpath_tokenizer(pattern, namespaces=None):
|
||||||
|
for token in _xpath_tokenizer_re.findall(pattern):
|
||||||
|
tag = token[1]
|
||||||
|
if tag and tag[0] != "{" and ":" in tag:
|
||||||
|
try:
|
||||||
|
if not namespaces:
|
||||||
|
raise KeyError
|
||||||
|
prefix, uri = tag.split(":", 1)
|
||||||
|
yield token[0], "{%s}%s" % (namespaces[prefix], uri)
|
||||||
|
except KeyError:
|
||||||
|
raise SyntaxError("prefix %r not found in prefix map" % prefix)
|
||||||
|
else:
|
||||||
|
yield token
|
||||||
|
|
||||||
|
def _get_parent_map(context):
|
||||||
|
parent_map = context.parent_map
|
||||||
|
if parent_map is None:
|
||||||
|
context.parent_map = parent_map = {}
|
||||||
|
for p in context.root.getiterator():
|
||||||
|
for e in p:
|
||||||
|
parent_map[e] = p
|
||||||
|
return parent_map
|
||||||
|
|
||||||
|
def _select(context, result, filter_fn=lambda *_: True):
|
||||||
|
for elem in result:
|
||||||
|
for e in elem:
|
||||||
|
if filter_fn(e, elem):
|
||||||
|
yield e
|
||||||
|
|
||||||
|
def _prepare_child(next_, token):
|
||||||
|
tag = token[1]
|
||||||
|
return functools.partial(_select, filter_fn=lambda e, _: e.tag == tag)
|
||||||
|
|
||||||
|
def _prepare_star(next_, token):
|
||||||
|
return _select
|
||||||
|
|
||||||
|
def _prepare_self(next_, token):
|
||||||
|
return lambda _, result: (e for e in result)
|
||||||
|
|
||||||
|
def _prepare_descendant(next_, token):
|
||||||
|
token = next(next_)
|
||||||
|
if token[0] == "*":
|
||||||
|
tag = "*"
|
||||||
|
elif not token[0]:
|
||||||
|
tag = token[1]
|
||||||
|
else:
|
||||||
|
raise SyntaxError("invalid descendant")
|
||||||
|
|
||||||
|
def select(context, result):
|
||||||
|
for elem in result:
|
||||||
|
for e in elem.getiterator(tag):
|
||||||
|
if e is not elem:
|
||||||
|
yield e
|
||||||
|
return select
|
||||||
|
|
||||||
|
def _prepare_parent(next_, token):
|
||||||
|
def select(context, result):
|
||||||
|
# FIXME: raise error if .. is applied at toplevel?
|
||||||
|
parent_map = _get_parent_map(context)
|
||||||
|
result_map = {}
|
||||||
|
for elem in result:
|
||||||
|
if elem in parent_map:
|
||||||
|
parent = parent_map[elem]
|
||||||
|
if parent not in result_map:
|
||||||
|
result_map[parent] = None
|
||||||
|
yield parent
|
||||||
|
return select
|
||||||
|
|
||||||
|
def _prepare_predicate(next_, token):
|
||||||
|
signature = []
|
||||||
|
predicate = []
|
||||||
|
for token in next_:
|
||||||
|
if token[0] == "]":
|
||||||
|
break
|
||||||
|
if token[0] and token[0][:1] in "'\"":
|
||||||
|
token = "'", token[0][1:-1]
|
||||||
|
signature.append(token[0] or "-")
|
||||||
|
predicate.append(token[1])
|
||||||
|
|
||||||
|
def select(context, result, filter_fn=lambda _: True):
|
||||||
|
for elem in result:
|
||||||
|
if filter_fn(elem):
|
||||||
|
yield elem
|
||||||
|
|
||||||
|
signature = "".join(signature)
|
||||||
|
# use signature to determine predicate type
|
||||||
|
if signature == "@-":
|
||||||
|
# [@attribute] predicate
|
||||||
|
key = predicate[1]
|
||||||
|
return functools.partial(
|
||||||
|
select, filter_fn=lambda el: el.get(key) is not None)
|
||||||
|
if signature == "@-='":
|
||||||
|
# [@attribute='value']
|
||||||
|
key = predicate[1]
|
||||||
|
value = predicate[-1]
|
||||||
|
return functools.partial(
|
||||||
|
select, filter_fn=lambda el: el.get(key) == value)
|
||||||
|
if signature == "-" and not re.match(r"\d+$", predicate[0]):
|
||||||
|
# [tag]
|
||||||
|
tag = predicate[0]
|
||||||
|
return functools.partial(
|
||||||
|
select, filter_fn=lambda el: el.find(tag) is not None)
|
||||||
|
if signature == "-='" and not re.match(r"\d+$", predicate[0]):
|
||||||
|
# [tag='value']
|
||||||
|
tag = predicate[0]
|
||||||
|
value = predicate[-1]
|
||||||
|
|
||||||
|
def itertext(el):
|
||||||
|
for e in el.getiterator():
|
||||||
|
e = e.text
|
||||||
|
if e:
|
||||||
|
yield e
|
||||||
|
|
||||||
|
def select(context, result):
|
||||||
|
for elem in result:
|
||||||
|
for e in elem.findall(tag):
|
||||||
|
if "".join(itertext(e)) == value:
|
||||||
|
yield elem
|
||||||
|
break
|
||||||
|
return select
|
||||||
|
if signature == "-" or signature == "-()" or signature == "-()-":
|
||||||
|
# [index] or [last()] or [last()-index]
|
||||||
|
if signature == "-":
|
||||||
|
index = int(predicate[0]) - 1
|
||||||
|
else:
|
||||||
|
if predicate[0] != "last":
|
||||||
|
raise SyntaxError("unsupported function")
|
||||||
|
if signature == "-()-":
|
||||||
|
try:
|
||||||
|
index = int(predicate[2]) - 1
|
||||||
|
except ValueError:
|
||||||
|
raise SyntaxError("unsupported expression")
|
||||||
|
else:
|
||||||
|
index = -1
|
||||||
|
|
||||||
|
def select(context, result):
|
||||||
|
parent_map = _get_parent_map(context)
|
||||||
|
for elem in result:
|
||||||
|
try:
|
||||||
|
parent = parent_map[elem]
|
||||||
|
# FIXME: what if the selector is "*" ?
|
||||||
|
elems = list(parent.findall(elem.tag))
|
||||||
|
if elems[index] is elem:
|
||||||
|
yield elem
|
||||||
|
except (IndexError, KeyError):
|
||||||
|
pass
|
||||||
|
return select
|
||||||
|
raise SyntaxError("invalid predicate")
|
||||||
|
|
||||||
|
ops = {
|
||||||
|
"": _prepare_child,
|
||||||
|
"*": _prepare_star,
|
||||||
|
".": _prepare_self,
|
||||||
|
"..": _prepare_parent,
|
||||||
|
"//": _prepare_descendant,
|
||||||
|
"[": _prepare_predicate,
|
||||||
|
}
|
||||||
|
|
||||||
|
_cache = {}
|
||||||
|
|
||||||
|
class _SelectorContext:
|
||||||
|
parent_map = None
|
||||||
|
|
||||||
|
def __init__(self, root):
|
||||||
|
self.root = root
|
||||||
|
|
||||||
|
##
|
||||||
|
# Generate all matching objects.
|
||||||
|
|
||||||
|
def compat_etree_iterfind(elem, path, namespaces=None):
|
||||||
|
# compile selector pattern
|
||||||
|
if path[-1:] == "/":
|
||||||
|
path = path + "*" # implicit all (FIXME: keep this?)
|
||||||
|
try:
|
||||||
|
selector = _cache[path]
|
||||||
|
except KeyError:
|
||||||
|
if len(_cache) > 100:
|
||||||
|
_cache.clear()
|
||||||
|
if path[:1] == "/":
|
||||||
|
raise SyntaxError("cannot use absolute path on element")
|
||||||
|
tokens = _xpath_tokenizer(path, namespaces)
|
||||||
|
selector = []
|
||||||
|
for token in tokens:
|
||||||
|
if token[0] == "/":
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
selector.append(ops[token[0]](tokens, token))
|
||||||
|
except StopIteration:
|
||||||
|
raise SyntaxError("invalid path")
|
||||||
|
_cache[path] = selector
|
||||||
|
# execute selector pattern
|
||||||
|
result = [elem]
|
||||||
|
context = _SelectorContext(elem)
|
||||||
|
for select in selector:
|
||||||
|
result = select(context, result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# end of code based on CPython 2.7 source
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
compat_xpath = lambda xpath: xpath
|
compat_xpath = lambda xpath: xpath
|
||||||
|
compat_etree_iterfind = lambda element, match: element.iterfind(match)
|
||||||
|
|
||||||
|
|
||||||
compat_os_name = os._name if os.name == 'java' else os.name
|
compat_os_name = os._name if os.name == 'java' else os.name
|
||||||
@ -2756,7 +2970,7 @@ except (AssertionError, UnicodeEncodeError):
|
|||||||
|
|
||||||
|
|
||||||
def compat_ord(c):
|
def compat_ord(c):
|
||||||
if type(c) is int:
|
if isinstance(c, int):
|
||||||
return c
|
return c
|
||||||
else:
|
else:
|
||||||
return ord(c)
|
return ord(c)
|
||||||
@ -2955,7 +3169,7 @@ except ImportError:
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
return exc_val is not None and isinstance(exc_val, self._exceptions or tuple())
|
return exc_type is not None and issubclass(exc_type, self._exceptions or tuple())
|
||||||
|
|
||||||
|
|
||||||
# subprocess.Popen context manager
|
# subprocess.Popen context manager
|
||||||
@ -3308,6 +3522,7 @@ __all__ = [
|
|||||||
'compat_contextlib_suppress',
|
'compat_contextlib_suppress',
|
||||||
'compat_ctypes_WINFUNCTYPE',
|
'compat_ctypes_WINFUNCTYPE',
|
||||||
'compat_etree_fromstring',
|
'compat_etree_fromstring',
|
||||||
|
'compat_etree_iterfind',
|
||||||
'compat_filter',
|
'compat_filter',
|
||||||
'compat_get_terminal_size',
|
'compat_get_terminal_size',
|
||||||
'compat_getenv',
|
'compat_getenv',
|
||||||
|
@ -1169,10 +1169,10 @@ class InfoExtractor(object):
|
|||||||
def _get_netrc_login_info(self, netrc_machine=None):
|
def _get_netrc_login_info(self, netrc_machine=None):
|
||||||
username = None
|
username = None
|
||||||
password = None
|
password = None
|
||||||
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
|
||||||
|
|
||||||
if self._downloader.params.get('usenetrc', False):
|
if self._downloader.params.get('usenetrc', False):
|
||||||
try:
|
try:
|
||||||
|
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||||
info = netrc.netrc().authenticators(netrc_machine)
|
info = netrc.netrc().authenticators(netrc_machine)
|
||||||
if info is not None:
|
if info is not None:
|
||||||
username = info[0]
|
username = info[0]
|
||||||
@ -1180,7 +1180,7 @@ class InfoExtractor(object):
|
|||||||
else:
|
else:
|
||||||
raise netrc.NetrcParseError(
|
raise netrc.NetrcParseError(
|
||||||
'No authenticators for %s' % netrc_machine)
|
'No authenticators for %s' % netrc_machine)
|
||||||
except (IOError, netrc.NetrcParseError) as err:
|
except (AttributeError, IOError, netrc.NetrcParseError) as err:
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
'parsing .netrc: %s' % error_to_compat_str(err))
|
'parsing .netrc: %s' % error_to_compat_str(err))
|
||||||
|
|
||||||
@ -1490,14 +1490,18 @@ class InfoExtractor(object):
|
|||||||
return dict((k, v) for k, v in info.items() if v is not None)
|
return dict((k, v) for k, v in info.items() if v is not None)
|
||||||
|
|
||||||
def _search_nextjs_data(self, webpage, video_id, **kw):
|
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||||
nkw = dict((k, v) for k, v in kw.items() if k in ('transform_source', 'fatal'))
|
# ..., *, transform_source=None, fatal=True, default=NO_DEFAULT
|
||||||
kw.pop('transform_source', None)
|
|
||||||
next_data = self._search_regex(
|
# TODO: remove this backward compat
|
||||||
r'''<script[^>]+\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>(?P<nd>[^<]+)</script>''',
|
default = kw.get('default', NO_DEFAULT)
|
||||||
webpage, 'next.js data', group='nd', **kw)
|
if default == '{}':
|
||||||
if not next_data:
|
kw['default'] = {}
|
||||||
return {}
|
kw = compat_kwargs(kw)
|
||||||
return self._parse_json(next_data, video_id, **nkw)
|
|
||||||
|
return self._search_json(
|
||||||
|
r'''<script\s[^>]*?\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>''',
|
||||||
|
webpage, 'next.js data', video_id, end_pattern='</script>',
|
||||||
|
**kw)
|
||||||
|
|
||||||
def _search_nuxt_data(self, webpage, video_id, *args, **kwargs):
|
def _search_nuxt_data(self, webpage, video_id, *args, **kwargs):
|
||||||
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||||
@ -3029,7 +3033,6 @@ class InfoExtractor(object):
|
|||||||
transform_source=transform_source, default=None)
|
transform_source=transform_source, default=None)
|
||||||
|
|
||||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||||
|
|
||||||
# allow passing `transform_source` through to _find_jwplayer_data()
|
# allow passing `transform_source` through to _find_jwplayer_data()
|
||||||
transform_source = kwargs.pop('transform_source', None)
|
transform_source = kwargs.pop('transform_source', None)
|
||||||
kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}
|
kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}
|
||||||
@ -3296,12 +3299,16 @@ class InfoExtractor(object):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
|
def _merge_subtitles(cls, subtitle_dict1, *subtitle_dicts, **kwargs):
|
||||||
""" Merge two subtitle dictionaries, language by language. """
|
""" Merge subtitle dictionaries, language by language. """
|
||||||
ret = dict(subtitle_dict1)
|
|
||||||
for lang in subtitle_dict2:
|
# ..., * , target=None
|
||||||
ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
|
target = kwargs.get('target') or dict(subtitle_dict1)
|
||||||
return ret
|
|
||||||
|
for subtitle_dict in subtitle_dicts:
|
||||||
|
for lang in subtitle_dict:
|
||||||
|
target[lang] = cls._merge_subtitle_items(target.get(lang, []), subtitle_dict[lang])
|
||||||
|
return target
|
||||||
|
|
||||||
def extract_automatic_captions(self, *args, **kwargs):
|
def extract_automatic_captions(self, *args, **kwargs):
|
||||||
if (self._downloader.params.get('writeautomaticsub', False)
|
if (self._downloader.params.get('writeautomaticsub', False)
|
||||||
@ -3334,6 +3341,29 @@ class InfoExtractor(object):
|
|||||||
def _generic_title(self, url):
|
def _generic_title(self, url):
|
||||||
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||||
|
|
||||||
|
def _yes_playlist(self, playlist_id, video_id, *args, **kwargs):
|
||||||
|
# smuggled_data=None, *, playlist_label='playlist', video_label='video'
|
||||||
|
smuggled_data = args[0] if len(args) == 1 else kwargs.get('smuggled_data')
|
||||||
|
playlist_label = kwargs.get('playlist_label', 'playlist')
|
||||||
|
video_label = kwargs.get('video_label', 'video')
|
||||||
|
|
||||||
|
if not playlist_id or not video_id:
|
||||||
|
return not video_id
|
||||||
|
|
||||||
|
no_playlist = (smuggled_data or {}).get('force_noplaylist')
|
||||||
|
if no_playlist is not None:
|
||||||
|
return not no_playlist
|
||||||
|
|
||||||
|
video_id = '' if video_id is True else ' ' + video_id
|
||||||
|
noplaylist = self.get_param('noplaylist')
|
||||||
|
self.to_screen(
|
||||||
|
'Downloading just the {0}{1} because of --no-playlist'.format(video_label, video_id)
|
||||||
|
if noplaylist else
|
||||||
|
'Downloading {0}{1} - add --no-playlist to download just the {2}{3}'.format(
|
||||||
|
playlist_label, '' if playlist_id is True else ' ' + playlist_id,
|
||||||
|
video_label, video_id))
|
||||||
|
return not noplaylist
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
@ -898,21 +898,13 @@ from .ooyala import (
|
|||||||
)
|
)
|
||||||
from .ora import OraTVIE
|
from .ora import OraTVIE
|
||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFONIE,
|
||||||
ORFFM4IE,
|
ORFONLiveIE,
|
||||||
ORFFM4StoryIE,
|
ORFFM4StoryIE,
|
||||||
ORFOE1IE,
|
|
||||||
ORFOE3IE,
|
|
||||||
ORFNOEIE,
|
|
||||||
ORFWIEIE,
|
|
||||||
ORFBGLIE,
|
|
||||||
ORFOOEIE,
|
|
||||||
ORFSTMIE,
|
|
||||||
ORFKTNIE,
|
|
||||||
ORFSBGIE,
|
|
||||||
ORFTIRIE,
|
|
||||||
ORFVBGIE,
|
|
||||||
ORFIPTVIE,
|
ORFIPTVIE,
|
||||||
|
ORFPodcastIE,
|
||||||
|
ORFRadioIE,
|
||||||
|
ORFRadioCollectionIE,
|
||||||
)
|
)
|
||||||
from .outsidetv import OutsideTVIE
|
from .outsidetv import OutsideTVIE
|
||||||
from .packtpub import (
|
from .packtpub import (
|
||||||
@ -1653,7 +1645,15 @@ from .younow import (
|
|||||||
YouNowChannelIE,
|
YouNowChannelIE,
|
||||||
YouNowMomentIE,
|
YouNowMomentIE,
|
||||||
)
|
)
|
||||||
from .youporn import YouPornIE
|
from .youporn import (
|
||||||
|
YouPornIE,
|
||||||
|
YouPornCategoryIE,
|
||||||
|
YouPornChannelIE,
|
||||||
|
YouPornCollectionIE,
|
||||||
|
YouPornStarIE,
|
||||||
|
YouPornTagIE,
|
||||||
|
YouPornVideosIE,
|
||||||
|
)
|
||||||
from .yourporn import YourPornIE
|
from .yourporn import YourPornIE
|
||||||
from .yourupload import YourUploadIE
|
from .yourupload import YourUploadIE
|
||||||
from .youtube import (
|
from .youtube import (
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
@ -10,7 +11,7 @@ from ..compat import (
|
|||||||
compat_ord,
|
compat_ord,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_zip
|
compat_zip as zip,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -24,7 +25,7 @@ class MixcloudBaseIE(InfoExtractor):
|
|||||||
def _call_api(self, object_type, object_fields, display_id, username, slug=None):
|
def _call_api(self, object_type, object_fields, display_id, username, slug=None):
|
||||||
lookup_key = object_type + 'Lookup'
|
lookup_key = object_type + 'Lookup'
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'https://www.mixcloud.com/graphql', display_id, query={
|
'https://app.mixcloud.com/graphql', display_id, query={
|
||||||
'query': '''{
|
'query': '''{
|
||||||
%s(lookup: {username: "%s"%s}) {
|
%s(lookup: {username: "%s"%s}) {
|
||||||
%s
|
%s
|
||||||
@ -44,7 +45,7 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'title': 'Cryptkeeper',
|
'title': 'Cryptkeeper',
|
||||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||||
'uploader': 'Daniel Holbach',
|
'uploader': 'dholbach', # was: 'Daniel Holbach',
|
||||||
'uploader_id': 'dholbach',
|
'uploader_id': 'dholbach',
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
@ -57,7 +58,7 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
|
'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Caribou 7 inch Vinyl Mix & Chat',
|
'title': 'Caribou 7 inch Vinyl Mix & Chat',
|
||||||
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
'description': r're:Last week Dan Snaith aka Caribou swung by the Brownswood.{136}',
|
||||||
'uploader': 'Gilles Peterson Worldwide',
|
'uploader': 'Gilles Peterson Worldwide',
|
||||||
'uploader_id': 'gillespeterson',
|
'uploader_id': 'gillespeterson',
|
||||||
'thumbnail': 're:https?://.*',
|
'thumbnail': 're:https?://.*',
|
||||||
@ -65,6 +66,23 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
'timestamp': 1422987057,
|
'timestamp': 1422987057,
|
||||||
'upload_date': '20150203',
|
'upload_date': '20150203',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': '404 not found',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mixcloud.com/gillespeterson/carnival-m%C3%BAsica-popular-brasileira-mix/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gillespeterson_carnival-música-popular-brasileira-mix',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': 'Carnival Música Popular Brasileira Mix',
|
||||||
|
'description': r're:Gilles was recently in Brazil to play at Boiler Room.{208}',
|
||||||
|
'timestamp': 1454347174,
|
||||||
|
'upload_date': '20160201',
|
||||||
|
'uploader': 'Gilles Peterson Worldwide',
|
||||||
|
'uploader_id': 'gillespeterson',
|
||||||
|
'thumbnail': 're:https?://.*',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
|
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -76,10 +94,10 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
|
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
|
||||||
return ''.join([
|
return ''.join([
|
||||||
compat_chr(compat_ord(ch) ^ compat_ord(k))
|
compat_chr(compat_ord(ch) ^ compat_ord(k))
|
||||||
for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
|
for ch, k in zip(ciphertext, itertools.cycle(key))])
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
username, slug = re.match(self._VALID_URL, url).groups()
|
username, slug = self._match_valid_url(url).groups()
|
||||||
username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
|
username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
|
||||||
track_id = '%s_%s' % (username, slug)
|
track_id = '%s_%s' % (username, slug)
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -8,7 +8,7 @@ from ..compat import compat_str
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
traverse_obj,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -109,7 +109,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
|
|||||||
}
|
}
|
||||||
name'''
|
name'''
|
||||||
|
|
||||||
@ classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
|
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
|
||||||
|
|
||||||
@ -118,7 +118,8 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
|
|||||||
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
|
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
|
||||||
|
|
||||||
def entries():
|
def entries():
|
||||||
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
|
for music in traverse_obj(artist, (
|
||||||
|
'musics', 'nodes', lambda _, m: m['musicID'])):
|
||||||
yield self._parse_music(music)
|
yield self._parse_music(music)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
@ -137,7 +138,7 @@ class PalcoMP3VideoIE(PalcoMP3BaseIE):
|
|||||||
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
|
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
|
||||||
'description': 'md5:7043342c09a224598e93546e98e49282',
|
'description': 'md5:7043342c09a224598e93546e98e49282',
|
||||||
'upload_date': '20161107',
|
'upload_date': '20161107',
|
||||||
'uploader_id': 'maiaramaraisaoficial',
|
'uploader_id': '@maiaramaraisaoficial',
|
||||||
'uploader': 'Maiara e Maraisa',
|
'uploader': 'Maiara e Maraisa',
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
@ -11,6 +12,7 @@ from ..utils import (
|
|||||||
strip_or_none,
|
strip_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
urljoin,
|
urljoin,
|
||||||
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -35,6 +37,26 @@ class VidLiiIE(InfoExtractor):
|
|||||||
'categories': ['News & Politics'],
|
'categories': ['News & Politics'],
|
||||||
'tags': ['Vidlii', 'Jan', 'Videogames'],
|
'tags': ['Vidlii', 'Jan', 'Videogames'],
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# HD
|
||||||
|
'url': 'https://www.vidlii.com/watch?v=2Ng8Abj2Fkl',
|
||||||
|
'md5': '450e7da379c884788c3a4fa02a3ce1a4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2Ng8Abj2Fkl',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'test',
|
||||||
|
'description': 'md5:cc55a86032a7b6b3cbfd0f6b155b52e9',
|
||||||
|
'thumbnail': 'https://www.vidlii.com/usfi/thmp/2Ng8Abj2Fkl.jpg',
|
||||||
|
'uploader': 'VidLii',
|
||||||
|
'uploader_url': 'https://www.vidlii.com/user/VidLii',
|
||||||
|
'upload_date': '20200927',
|
||||||
|
'duration': 5,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'average_rating': float,
|
||||||
|
'categories': ['Film & Animation'],
|
||||||
|
'tags': list,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
|
'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -46,11 +68,32 @@ class VidLiiIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
|
'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(
|
formats = []
|
||||||
r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage,
|
|
||||||
'video url', group='url')
|
|
||||||
|
|
||||||
title = self._search_regex(
|
def add_format(format_url, height=None):
|
||||||
|
height = int(self._search_regex(r'(\d+)\.mp4',
|
||||||
|
format_url, 'height', default=360))
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': '%dp' % height if height else None,
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
|
|
||||||
|
sources = re.findall(
|
||||||
|
r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1',
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
if len(sources) > 1:
|
||||||
|
add_format(sources[1][1])
|
||||||
|
self._check_formats(formats, video_id)
|
||||||
|
if len(sources) > 0:
|
||||||
|
add_format(sources[0][1])
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
(r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
|
(r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
|
||||||
'title')
|
'title')
|
||||||
|
|
||||||
@ -82,9 +125,9 @@ class VidLiiIE(InfoExtractor):
|
|||||||
default=None) or self._search_regex(
|
default=None) or self._search_regex(
|
||||||
r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
view_count = int_or_none(self._search_regex(
|
view_count = str_to_int(self._html_search_regex(
|
||||||
(r'<strong>(\d+)</strong> views',
|
(r'<strong>([\d,.]+)</strong> views',
|
||||||
r'Views\s*:\s*<strong>(\d+)</strong>'),
|
r'Views\s*:\s*<strong>([\d,.]+)</strong>'),
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
comment_count = int_or_none(self._search_regex(
|
comment_count = int_or_none(self._search_regex(
|
||||||
@ -109,7 +152,7 @@ class VidLiiIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
@ -106,6 +106,25 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://music.yandex.com/album/540508/track/4878838',
|
'url': 'http://music.yandex.com/album/540508/track/4878838',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://music.yandex.ru/album/16302456/track/85430762',
|
||||||
|
'md5': '11b8d50ab03b57738deeaadf661a0a48',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '85430762',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'abr': 128,
|
||||||
|
'title': 'Haddadi Von Engst, Phonic Youth, Super Flu - Til The End (Super Flu Remix)',
|
||||||
|
'filesize': int,
|
||||||
|
'duration': 431.14,
|
||||||
|
'track': 'Til The End (Super Flu Remix)',
|
||||||
|
'album': 'Til The End',
|
||||||
|
'album_artist': 'Haddadi Von Engst, Phonic Youth',
|
||||||
|
'artist': 'Haddadi Von Engst, Phonic Youth, Super Flu',
|
||||||
|
'release_year': 2021,
|
||||||
|
'genre': 'house',
|
||||||
|
'disc_number': 1,
|
||||||
|
'track_number': 2,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -116,10 +135,14 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
|||||||
'track', tld, url, track_id, 'Downloading track JSON',
|
'track', tld, url, track_id, 'Downloading track JSON',
|
||||||
{'track': '%s:%s' % (track_id, album_id)})['track']
|
{'track': '%s:%s' % (track_id, album_id)})['track']
|
||||||
track_title = track['title']
|
track_title = track['title']
|
||||||
|
track_version = track.get('version')
|
||||||
|
if track_version:
|
||||||
|
track_title = '%s (%s)' % (track_title, track_version)
|
||||||
|
|
||||||
download_data = self._download_json(
|
download_data = self._download_json(
|
||||||
'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
|
'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
|
||||||
track_id, 'Downloading track location url JSON',
|
track_id, 'Downloading track location url JSON',
|
||||||
|
query={'hq': 1},
|
||||||
headers={'X-Retpath-Y': url})
|
headers={'X-Retpath-Y': url})
|
||||||
|
|
||||||
fd_data = self._download_json(
|
fd_data = self._download_json(
|
||||||
|
@ -1,20 +1,38 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
|
ExtractorError,
|
||||||
|
get_element_by_class,
|
||||||
|
get_element_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_to_int,
|
merge_dicts,
|
||||||
|
parse_count,
|
||||||
|
parse_qs,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class YouPornIE(InfoExtractor):
|
class YouPornIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
|
_VALID_URL = (
|
||||||
|
r'youporn:(?P<id>\d+)',
|
||||||
|
r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)
|
||||||
|
(?:/(?:(?P<display_id>[^/?#&]+)/?)?)?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
)
|
||||||
|
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||||
'md5': '3744d24c50438cf5b6f6d59feb5055c2',
|
'md5': '3744d24c50438cf5b6f6d59feb5055c2',
|
||||||
@ -34,7 +52,7 @@ class YouPornIE(InfoExtractor):
|
|||||||
'tags': list,
|
'tags': list,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'skip': 'This video has been disabled',
|
'skip': 'This video has been deactivated',
|
||||||
}, {
|
}, {
|
||||||
# Unknown uploader
|
# Unknown uploader
|
||||||
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
|
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
|
||||||
@ -66,57 +84,104 @@ class YouPornIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
|
'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youporn.com/watch/16290308/tinderspecial-trailer1/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '16290308',
|
||||||
|
'age_limit': 18,
|
||||||
|
'categories': [],
|
||||||
|
'description': None, # SEO spam using title removed
|
||||||
|
'display_id': 'tinderspecial-trailer1',
|
||||||
|
'duration': 298.0,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20201123',
|
||||||
|
'uploader': 'Ersties',
|
||||||
|
'tags': [],
|
||||||
|
'thumbnail': 'https://fi1.ypncdn.com/m=eaSaaTbWx/202011/23/16290308/original/3.jpg',
|
||||||
|
'timestamp': 1606147564,
|
||||||
|
'title': 'Tinder In Real Life',
|
||||||
|
'view_count': int,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(cls, webpage):
|
||||||
return re.findall(
|
def yield_urls():
|
||||||
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)',
|
for p in cls._EMBED_REGEX:
|
||||||
webpage)
|
for m in re.finditer(p, webpage):
|
||||||
|
yield m.group('url')
|
||||||
|
|
||||||
|
return list(yield_urls())
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
# A different video ID (data-video-id) is hidden in the page but
|
||||||
video_id = mobj.group('id')
|
# never seems to be used
|
||||||
display_id = mobj.group('display_id') or video_id
|
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||||
|
url = 'http://www.youporn.com/watch/%s' % (video_id,)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, video_id, headers={'Cookie': 'age_verified=1'})
|
||||||
|
|
||||||
definitions = self._download_json(
|
watchable = self._search_regex(
|
||||||
'https://www.youporn.com/api/video/media_definitions/%s/' % video_id,
|
r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''',
|
||||||
display_id)
|
webpage, 'watchability', default=None)
|
||||||
|
if not watchable:
|
||||||
|
msg = re.split(r'\s{4}', clean_html(get_element_by_id(
|
||||||
|
'mainContent', webpage)) or '')[0]
|
||||||
|
raise ExtractorError(
|
||||||
|
('%s says: %s' % (self.IE_NAME, msg))
|
||||||
|
if msg else 'Video unavailable: no reason found',
|
||||||
|
expected=True)
|
||||||
|
# internal ID ?
|
||||||
|
# video_id = extract_attributes(watchable).get('data-video-id')
|
||||||
|
|
||||||
|
playervars = self._search_json(
|
||||||
|
r'\bplayervars\s*:', webpage, 'playervars', video_id)
|
||||||
|
|
||||||
|
def get_fmt(x):
|
||||||
|
v_url = url_or_none(x.get('videoUrl'))
|
||||||
|
if v_url:
|
||||||
|
x['videoUrl'] = v_url
|
||||||
|
return (x['format'], x)
|
||||||
|
|
||||||
|
defs_by_format = dict(traverse_obj(playervars, (
|
||||||
|
'mediaDefinitions', lambda _, v: v.get('format'), T(get_fmt))))
|
||||||
|
|
||||||
|
def get_format_data(f):
|
||||||
|
if f not in defs_by_format:
|
||||||
|
return []
|
||||||
|
return self._download_json(
|
||||||
|
defs_by_format[f]['videoUrl'], video_id, '{0}-formats'.format(f))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for definition in definitions:
|
# Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
|
||||||
if not isinstance(definition, dict):
|
for hls_url in traverse_obj(
|
||||||
continue
|
get_format_data('hls'),
|
||||||
video_url = url_or_none(definition.get('videoUrl'))
|
(lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'),
|
||||||
if not video_url:
|
(Ellipsis, 'videoUrl')):
|
||||||
continue
|
formats.extend(self._extract_m3u8_formats(
|
||||||
f = {
|
hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls',
|
||||||
'url': video_url,
|
entry_protocol='m3u8_native'))
|
||||||
'filesize': int_or_none(definition.get('videoSize')),
|
|
||||||
}
|
for f in traverse_obj(get_format_data('mp4'), (
|
||||||
height = int_or_none(definition.get('quality'))
|
lambda _, v: v.get('videoUrl'), {
|
||||||
|
'url': ('videoUrl', T(url_or_none)),
|
||||||
|
'filesize': ('videoSize', T(int_or_none)),
|
||||||
|
'height': ('quality', T(int_or_none)),
|
||||||
|
}, T(lambda x: x.get('videoUrl') and x))):
|
||||||
# Video URL's path looks like this:
|
# Video URL's path looks like this:
|
||||||
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||||
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||||
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
|
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
|
||||||
# We will benefit from it by extracting some metadata
|
# We will benefit from it by extracting some metadata
|
||||||
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', f['videoUrl'])
|
||||||
if mobj:
|
if mobj:
|
||||||
if not height:
|
if not f.get('height'):
|
||||||
height = int(mobj.group('height'))
|
f['height'] = int(mobj.group('height'))
|
||||||
bitrate = int(mobj.group('bitrate'))
|
f['tbr'] = int(mobj.group('bitrate'))
|
||||||
f.update({
|
f['format_id'] = '%dp-%dk' % (f['height'], f['tbr'])
|
||||||
'format_id': '%dp-%dk' % (height, bitrate),
|
|
||||||
'tbr': bitrate,
|
|
||||||
})
|
|
||||||
f['height'] = height
|
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
'http://www.youporn.com/watch/%s' % video_id, display_id,
|
|
||||||
headers={'Cookie': 'age_verified=1'})
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
|
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
|
||||||
webpage, 'title', default=None) or self._og_search_title(
|
webpage, 'title', default=None) or self._og_search_title(
|
||||||
@ -131,8 +196,10 @@ class YouPornIE(InfoExtractor):
|
|||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
||||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||||
duration = int_or_none(self._html_search_meta(
|
duration = traverse_obj(playervars, ('duration', T(int_or_none)))
|
||||||
'video:duration', webpage, 'duration', fatal=False))
|
if duration is None:
|
||||||
|
duration = int_or_none(self._html_search_meta(
|
||||||
|
'video:duration', webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
||||||
@ -148,11 +215,11 @@ class YouPornIE(InfoExtractor):
|
|||||||
|
|
||||||
view_count = None
|
view_count = None
|
||||||
views = self._search_regex(
|
views = self._search_regex(
|
||||||
r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage,
|
r'(<div\s[^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>',
|
||||||
'views', default=None)
|
webpage, 'views', default=None)
|
||||||
if views:
|
if views:
|
||||||
view_count = str_to_int(extract_attributes(views).get('data-value'))
|
view_count = parse_count(extract_attributes(views).get('data-value'))
|
||||||
comment_count = str_to_int(self._search_regex(
|
comment_count = parse_count(self._search_regex(
|
||||||
r'>All [Cc]omments? \(([\d,.]+)\)',
|
r'>All [Cc]omments? \(([\d,.]+)\)',
|
||||||
webpage, 'comment count', default=None))
|
webpage, 'comment count', default=None))
|
||||||
|
|
||||||
@ -168,7 +235,10 @@ class YouPornIE(InfoExtractor):
|
|||||||
r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
|
r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
|
||||||
'tags')
|
'tags')
|
||||||
|
|
||||||
return {
|
data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False) or {}
|
||||||
|
data.pop('url', None)
|
||||||
|
|
||||||
|
result = merge_dicts(data, {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@ -183,4 +253,442 @@ class YouPornIE(InfoExtractor):
|
|||||||
'tags': tags,
|
'tags': tags,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
})
|
||||||
|
# Remove promotional non-description
|
||||||
|
if result.get('description', '').startswith(
|
||||||
|
'Watch %s online' % (result['title'],)):
|
||||||
|
del result['description']
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornListBase(InfoExtractor):
|
||||||
|
# pattern in '.title-text' element of page section containing videos
|
||||||
|
_PLAYLIST_TITLEBAR_RE = r'\s+[Vv]ideos\s*$'
|
||||||
|
_PAGE_RETRY_COUNT = 0 # ie, no retry
|
||||||
|
_PAGE_RETRY_DELAY = 2 # seconds
|
||||||
|
|
||||||
|
def _get_next_url(self, url, pl_id, html):
|
||||||
|
return urljoin(url, self._search_regex(
|
||||||
|
r'''<a\s[^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''',
|
||||||
|
get_element_by_id('next', html) or '', 'next page',
|
||||||
|
group='url', default=None))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_title_from_slug(cls, title_slug):
|
||||||
|
return re.sub(r'[_-]', ' ', title_slug)
|
||||||
|
|
||||||
|
def _entries(self, url, pl_id, html=None, page_num=None):
|
||||||
|
|
||||||
|
# separates page sections
|
||||||
|
PLAYLIST_SECTION_RE = (
|
||||||
|
r'''<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?title-bar(?:\s+[\w$-]+|\s)*\1[^>]*>'''
|
||||||
|
)
|
||||||
|
# contains video link
|
||||||
|
VIDEO_URL_RE = r'''(?x)
|
||||||
|
<div\s[^>]*\bdata-video-id\s*=\s*('|")\d+\1[^>]*>\s*
|
||||||
|
(?:<div\b[\s\S]+?</div>\s*)*
|
||||||
|
<a\s[^>]*\bhref\s*=\s*('|")(?P<url>(?:(?!\2)[^>])+)\2
|
||||||
|
'''
|
||||||
|
|
||||||
|
def yield_pages(url, html=html, page_num=page_num):
|
||||||
|
fatal = not html
|
||||||
|
for pnum in itertools.count(start=page_num or 1):
|
||||||
|
if not html:
|
||||||
|
html = self._download_webpage(
|
||||||
|
url, pl_id, note='Downloading page %d' % pnum,
|
||||||
|
fatal=fatal)
|
||||||
|
if not html:
|
||||||
|
break
|
||||||
|
fatal = False
|
||||||
|
yield (url, html, pnum)
|
||||||
|
# explicit page: extract just that page
|
||||||
|
if page_num is not None:
|
||||||
|
break
|
||||||
|
next_url = self._get_next_url(url, pl_id, html)
|
||||||
|
if not next_url or next_url == url:
|
||||||
|
break
|
||||||
|
url, html = next_url, None
|
||||||
|
|
||||||
|
def retry_page(msg, tries_left, page_data):
|
||||||
|
if tries_left <= 0:
|
||||||
|
return
|
||||||
|
self.report_warning(msg, pl_id)
|
||||||
|
sleep(self._PAGE_RETRY_DELAY)
|
||||||
|
return next(
|
||||||
|
yield_pages(page_data[0], page_num=page_data[2]), None)
|
||||||
|
|
||||||
|
def yield_entries(html):
|
||||||
|
for frag in re.split(PLAYLIST_SECTION_RE, html):
|
||||||
|
if not frag:
|
||||||
|
continue
|
||||||
|
t_text = get_element_by_class('title-text', frag or '')
|
||||||
|
if not (t_text and re.search(self._PLAYLIST_TITLEBAR_RE, t_text)):
|
||||||
|
continue
|
||||||
|
for m in re.finditer(VIDEO_URL_RE, frag):
|
||||||
|
video_url = urljoin(url, m.group('url'))
|
||||||
|
if video_url:
|
||||||
|
yield self.url_result(video_url)
|
||||||
|
|
||||||
|
last_first_url = None
|
||||||
|
for page_data in yield_pages(url, html=html, page_num=page_num):
|
||||||
|
# page_data: url, html, page_num
|
||||||
|
first_url = None
|
||||||
|
tries_left = self._PAGE_RETRY_COUNT + 1
|
||||||
|
while tries_left > 0:
|
||||||
|
tries_left -= 1
|
||||||
|
for from_ in yield_entries(page_data[1]):
|
||||||
|
# may get the same page twice instead of empty page
|
||||||
|
# or (site bug) intead of actual next page
|
||||||
|
if not first_url:
|
||||||
|
first_url = from_['url']
|
||||||
|
if first_url == last_first_url:
|
||||||
|
# sometimes (/porntags/) the site serves the previous page
|
||||||
|
# instead but may provide the correct page after a delay
|
||||||
|
page_data = retry_page(
|
||||||
|
'Retrying duplicate page...', tries_left, page_data)
|
||||||
|
if page_data:
|
||||||
|
first_url = None
|
||||||
|
break
|
||||||
|
continue
|
||||||
|
yield from_
|
||||||
|
else:
|
||||||
|
if not first_url and 'no-result-paragarph1' in page_data[1]:
|
||||||
|
page_data = retry_page(
|
||||||
|
'Retrying empty page...', tries_left, page_data)
|
||||||
|
if page_data:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# success/failure
|
||||||
|
break
|
||||||
|
# may get an infinite (?) sequence of empty pages
|
||||||
|
if not first_url:
|
||||||
|
break
|
||||||
|
last_first_url = first_url
|
||||||
|
|
||||||
|
def _real_extract(self, url, html=None):
|
||||||
|
# exceptionally, id may be None
|
||||||
|
m_dict = self._match_valid_url(url).groupdict()
|
||||||
|
pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort'))
|
||||||
|
|
||||||
|
qs = parse_qs(url)
|
||||||
|
for q, v in qs.items():
|
||||||
|
if v:
|
||||||
|
qs[q] = v[-1]
|
||||||
|
else:
|
||||||
|
del qs[q]
|
||||||
|
|
||||||
|
base_id = pl_id or 'YouPorn'
|
||||||
|
title = self._get_title_from_slug(base_id)
|
||||||
|
if page_type:
|
||||||
|
title = '%s %s' % (page_type.capitalize(), title)
|
||||||
|
base_id = [base_id.lower()]
|
||||||
|
if sort is None:
|
||||||
|
title += ' videos'
|
||||||
|
else:
|
||||||
|
title = '%s videos by %s' % (title, re.sub(r'[_-]', ' ', sort))
|
||||||
|
base_id.append(sort)
|
||||||
|
if qs:
|
||||||
|
ps = ['%s=%s' % item for item in sorted(qs.items())]
|
||||||
|
title += ' (%s)' % ','.join(ps)
|
||||||
|
base_id.extend(ps)
|
||||||
|
pl_id = '/'.join(base_id)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(url, pl_id, html=html,
|
||||||
|
page_num=int_or_none(qs.get('page'))),
|
||||||
|
playlist_id=pl_id, playlist_title=title)
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornCategoryIE(YouPornListBase):
|
||||||
|
IE_DESC = 'YouPorn category, with sorting, filtering and pagination'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/
|
||||||
|
(?P<type>category)/(?P<id>[^/?#&]+)
|
||||||
|
(?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list with pagination',
|
||||||
|
'url': 'https://www.youporn.com/category/lingerie/popular/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lingerie/popular',
|
||||||
|
'title': 'Category lingerie videos by popular',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 39,
|
||||||
|
}, {
|
||||||
|
'note': 'Filtered paginated list with single page result',
|
||||||
|
'url': 'https://www.youporn.com/category/lingerie/duration/?min_minutes=10',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lingerie/duration/min_minutes=10',
|
||||||
|
'title': 'Category lingerie videos by duration (min_minutes=10)',
|
||||||
|
},
|
||||||
|
'playlist_maxcount': 30,
|
||||||
|
}, {
|
||||||
|
'note': 'Single page of full list',
|
||||||
|
'url': 'https://www.youporn.com/category/lingerie/popular?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lingerie/popular/page=1',
|
||||||
|
'title': 'Category lingerie videos by popular (page=1)',
|
||||||
|
},
|
||||||
|
'playlist_count': 30,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornChannelIE(YouPornListBase):
|
||||||
|
IE_DESC = 'YouPorn channel, with sorting and pagination'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/
|
||||||
|
(?P<type>channel)/(?P<id>[^/?#&]+)
|
||||||
|
(?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list with pagination',
|
||||||
|
'url': 'https://www.youporn.com/channel/x-feeds/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x-feeds',
|
||||||
|
'title': 'Channel X-Feeds videos',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 37,
|
||||||
|
}, {
|
||||||
|
'note': 'Single page of full list (no filters here)',
|
||||||
|
'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x-feeds/duration/page=1',
|
||||||
|
'title': 'Channel X-Feeds videos by duration (page=1)',
|
||||||
|
},
|
||||||
|
'playlist_count': 24,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_title_from_slug(title_slug):
|
||||||
|
return re.sub(r'_', ' ', title_slug).title()
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornCollectionIE(YouPornListBase):
|
||||||
|
IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/
|
||||||
|
(?P<type>collection)s/videos/(?P<id>\d+)
|
||||||
|
(?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+in\s'
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list with pagination',
|
||||||
|
'url': 'https://www.youporn.com/collections/videos/33044251/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '33044251',
|
||||||
|
'title': 'Collection Sexy Lips videos',
|
||||||
|
'uploader': 'ph-littlewillyb',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 50,
|
||||||
|
}, {
|
||||||
|
'note': 'Single page of full list (no filters here)',
|
||||||
|
'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '33044251/time/page=1',
|
||||||
|
'title': 'Collection Sexy Lips videos by time (page=1)',
|
||||||
|
'uploader': 'ph-littlewillyb',
|
||||||
|
},
|
||||||
|
'playlist_count': 20,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
pl_id = self._match_id(url)
|
||||||
|
html = self._download_webpage(url, pl_id)
|
||||||
|
playlist = super(YouPornCollectionIE, self)._real_extract(url, html=html)
|
||||||
|
infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class(
|
||||||
|
'collection-infos', html)) or '')
|
||||||
|
title, uploader = self._search_regex(
|
||||||
|
r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)',
|
||||||
|
infos, 'title/uploader', group=('title', 'uploader'), default=(None, None))
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'title': playlist['title'].replace(playlist['id'].split('/')[0], title),
|
||||||
|
'uploader': uploader,
|
||||||
|
}, playlist) if title else playlist
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornTagIE(YouPornListBase):
|
||||||
|
IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/
|
||||||
|
porn(?P<type>tag)s/(?P<id>[^/?#&]+)
|
||||||
|
(?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+tagged\s'
|
||||||
|
_PAGE_RETRY_COUNT = 1
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list with pagination',
|
||||||
|
'url': 'https://www.youporn.com/porntags/austrian',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'austrian',
|
||||||
|
'title': 'Tag austrian videos',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 35,
|
||||||
|
'expected_warnings': ['Retrying duplicate page'],
|
||||||
|
}, {
|
||||||
|
'note': 'Filtered paginated list with single page result',
|
||||||
|
'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'austrian/duration/min_minutes=10',
|
||||||
|
'title': 'Tag austrian videos by duration (min_minutes=10)',
|
||||||
|
},
|
||||||
|
# number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3,
|
||||||
|
# or more, varying with number of ads; let's set max as 9x4
|
||||||
|
# NB col 1 may not be shown in non-JS page with site CSS and zoom 100%
|
||||||
|
'playlist_maxcount': 32,
|
||||||
|
'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
|
||||||
|
}, {
|
||||||
|
'note': 'Single page of full list',
|
||||||
|
'url': 'https://www.youporn.com/porntags/austrian/?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'austrian/page=1',
|
||||||
|
'title': 'Tag austrian videos (page=1)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 32,
|
||||||
|
'playlist_maxcount': 34,
|
||||||
|
'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
|
||||||
|
}]
|
||||||
|
|
||||||
|
# YP tag navigation is broken, loses sort
|
||||||
|
def _get_next_url(self, url, pl_id, html):
|
||||||
|
next_url = super(YouPornTagIE, self)._get_next_url(url, pl_id, html)
|
||||||
|
if next_url:
|
||||||
|
n = self._match_valid_url(next_url)
|
||||||
|
if n:
|
||||||
|
s = n.groupdict().get('sort')
|
||||||
|
if s:
|
||||||
|
u = self._match_valid_url(url)
|
||||||
|
if u:
|
||||||
|
u = u.groupdict().get('sort')
|
||||||
|
if s and not u:
|
||||||
|
n = n.end('sort')
|
||||||
|
next_url = next_url[:n] + '/' + u + next_url[n:]
|
||||||
|
return next_url
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornStarIE(YouPornListBase):
|
||||||
|
IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/
|
||||||
|
(?P<type>pornstar)/(?P<id>[^/?#&]+)
|
||||||
|
(?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+[fF]eaturing\s'
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list with pagination',
|
||||||
|
'url': 'https://www.youporn.com/pornstar/daynia/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'daynia',
|
||||||
|
'title': 'Pornstar Daynia videos',
|
||||||
|
'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 45,
|
||||||
|
}, {
|
||||||
|
'note': 'Single page of full list (no filters here)',
|
||||||
|
'url': 'https://www.youporn.com/pornstar/daynia/?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'daynia/page=1',
|
||||||
|
'title': 'Pornstar Daynia videos (page=1)',
|
||||||
|
'description': 're:.{180,}',
|
||||||
|
},
|
||||||
|
'playlist_count': 26,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_title_from_slug(title_slug):
|
||||||
|
return re.sub(r'_', ' ', title_slug).title()
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
pl_id = self._match_id(url)
|
||||||
|
html = self._download_webpage(url, pl_id)
|
||||||
|
playlist = super(YouPornStarIE, self)._real_extract(url, html=html)
|
||||||
|
INFO_ELEMENT_RE = r'''(?x)
|
||||||
|
<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*>
|
||||||
|
(?P<info>[\s\S]+?)(?:</div>\s*){6,}
|
||||||
|
'''
|
||||||
|
|
||||||
|
infos = self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default='')
|
||||||
|
if infos:
|
||||||
|
infos = re.sub(
|
||||||
|
r'(?:\s*nl=nl)+\s*', ' ',
|
||||||
|
re.sub(r'(?u)\s+', ' ', clean_html(
|
||||||
|
re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '')
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'description': infos.strip() or None,
|
||||||
|
}, playlist)
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornVideosIE(YouPornListBase):
|
||||||
|
IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/
|
||||||
|
(?:(?P<id>browse)/)?
|
||||||
|
(?P<sort>(?(id)
|
||||||
|
(?:duration|rating|time|views)|
|
||||||
|
(?:most_(?:favou?rit|view)ed|recommended|top_rated)?))
|
||||||
|
(?:[/#?]|$)
|
||||||
|
'''
|
||||||
|
_PLAYLIST_TITLEBAR_RE = r'\s+(?:[Vv]ideos|VIDEOS)\s*$'
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list with pagination (too long for test)',
|
||||||
|
'url': 'https://www.youporn.com/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'youporn',
|
||||||
|
'title': 'YouPorn videos',
|
||||||
|
},
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'note': 'Full list with pagination (too long for test)',
|
||||||
|
'url': 'https://www.youporn.com/recommended',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'youporn/recommended',
|
||||||
|
'title': 'YouPorn videos by recommended',
|
||||||
|
},
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'note': 'Full list with pagination (too long for test)',
|
||||||
|
'url': 'https://www.youporn.com/top_rated',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'youporn/top_rated',
|
||||||
|
'title': 'YouPorn videos by top rated',
|
||||||
|
},
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'note': 'Full list with pagination (too long for test)',
|
||||||
|
'url': 'https://www.youporn.com/browse/time',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'browse/time',
|
||||||
|
'title': 'YouPorn videos by time',
|
||||||
|
},
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'note': 'Filtered paginated list with single page result',
|
||||||
|
'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'youporn/most_favorited/max_minutes=2/res=VR',
|
||||||
|
'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
'playlist_maxcount': 28,
|
||||||
|
}, {
|
||||||
|
'note': 'Filtered paginated list with several pages',
|
||||||
|
'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'youporn/most_favorited/max_minutes=5/res=VR',
|
||||||
|
'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 45,
|
||||||
|
}, {
|
||||||
|
'note': 'Single page of full list',
|
||||||
|
'url': 'https://www.youporn.com/browse/time?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'browse/time/page=1',
|
||||||
|
'title': 'YouPorn videos by time (page=1)',
|
||||||
|
},
|
||||||
|
'playlist_count': 36,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_title_from_slug(title_slug):
|
||||||
|
return 'YouPorn' if title_slug == 'browse' else title_slug
|
||||||
|
@ -1636,7 +1636,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
try:
|
try:
|
||||||
jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
|
jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
raise ExtractorError('Unable to extract nsig jsi, player_id, func_codefunction code', cause=e)
|
raise ExtractorError('Unable to extract nsig function code', cause=e)
|
||||||
if self.get_param('youtube_print_sig_code'):
|
if self.get_param('youtube_print_sig_code'):
|
||||||
self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(
|
self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(
|
||||||
player_id, func_code[1]))
|
player_id, func_code[1]))
|
||||||
@ -1647,7 +1647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
except JSInterpreter.Exception as e:
|
except JSInterpreter.Exception as e:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'%s (%s %s)' % (
|
'%s (%s %s)' % (
|
||||||
'Unable to decode n-parameter: download likely to be throttled',
|
'Unable to decode n-parameter: expect download to be blocked or throttled',
|
||||||
error_to_compat_str(e),
|
error_to_compat_str(e),
|
||||||
traceback.format_exc()),
|
traceback.format_exc()),
|
||||||
video_id=video_id)
|
video_id=video_id)
|
||||||
@ -1658,13 +1658,52 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _extract_n_function_name(self, jscode):
|
def _extract_n_function_name(self, jscode):
|
||||||
func_name, idx = self._search_regex(
|
func_name, idx = self._search_regex(
|
||||||
r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?\([\w$]+\)',
|
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
|
||||||
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
|
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
|
||||||
|
# or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
|
||||||
|
# or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
|
||||||
|
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
|
||||||
|
# older: (b=a.get("n"))&&(b=nfunc(b)
|
||||||
|
r'''(?x)
|
||||||
|
\((?:[\w$()\s]+,)*?\s* # (
|
||||||
|
(?P<b>[a-z])\s*=\s* # b=
|
||||||
|
(?:
|
||||||
|
(?: # expect ,c=a.get(b) (etc)
|
||||||
|
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
|
||||||
|
"n+"\[\s*\+?s*[\w$.]+\s*]
|
||||||
|
)\s*(?:,[\w$()\s]+(?=,))*|
|
||||||
|
(?P<old>[\w$]+) # a (old[er])
|
||||||
|
)\s*
|
||||||
|
(?(old)
|
||||||
|
# b.get("n")
|
||||||
|
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
||||||
|
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
|
||||||
|
| # ,c=a.get(b)
|
||||||
|
,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
|
||||||
|
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
||||||
|
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
|
||||||
|
)
|
||||||
|
# interstitial junk
|
||||||
|
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
|
||||||
|
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
|
||||||
|
# nfunc|nfunc[idx]
|
||||||
|
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
|
||||||
|
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
|
||||||
|
default=(None, None))
|
||||||
|
# thx bashonly: yt-dlp/yt-dlp/pull/10611
|
||||||
|
if not func_name:
|
||||||
|
self.report_warning('Falling back to generic n function search')
|
||||||
|
return self._search_regex(
|
||||||
|
r'''(?xs)
|
||||||
|
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
|
||||||
|
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
|
||||||
|
\s*\{(?:(?!};).)+?["']enhanced_except_
|
||||||
|
''', jscode, 'Initial JS player n function name', group='name')
|
||||||
if not idx:
|
if not idx:
|
||||||
return func_name
|
return func_name
|
||||||
|
|
||||||
return self._parse_json(self._search_regex(
|
return self._parse_json(self._search_regex(
|
||||||
r'var {0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
|
r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
|
||||||
'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
|
'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
|
||||||
func_name, transform_source=js_to_json)[int(idx)]
|
func_name, transform_source=js_to_json)[int(idx)]
|
||||||
|
|
||||||
@ -1679,17 +1718,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
func_name = self._extract_n_function_name(jscode)
|
func_name = self._extract_n_function_name(jscode)
|
||||||
|
|
||||||
# For redundancy
|
func_code = jsi.extract_function_code(func_name)
|
||||||
func_code = self._search_regex(
|
|
||||||
r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
|
|
||||||
# NB: The end of the regex is intentionally kept strict
|
|
||||||
{(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
|
|
||||||
jscode, 'nsig function', group=('var', 'code'), default=None)
|
|
||||||
if func_code:
|
|
||||||
func_code = ([func_code[0]], func_code[1])
|
|
||||||
else:
|
|
||||||
self.write_debug('Extracting nsig function with jsinterp')
|
|
||||||
func_code = jsi.extract_function_code(func_name)
|
|
||||||
|
|
||||||
self.cache.store('youtube-nsig', player_id, func_code)
|
self.cache.store('youtube-nsig', player_id, func_code)
|
||||||
return jsi, player_id, func_code
|
return jsi, player_id, func_code
|
||||||
|
@ -14,12 +14,15 @@ from .utils import (
|
|||||||
remove_quotes,
|
remove_quotes,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
variadic,
|
variadic,
|
||||||
|
write_string,
|
||||||
)
|
)
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_chr,
|
compat_chr,
|
||||||
compat_collections_chain_map as ChainMap,
|
compat_collections_chain_map as ChainMap,
|
||||||
|
compat_filter as filter,
|
||||||
compat_itertools_zip_longest as zip_longest,
|
compat_itertools_zip_longest as zip_longest,
|
||||||
|
compat_map as map,
|
||||||
compat_str,
|
compat_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -53,15 +56,16 @@ def wraps_op(op):
|
|||||||
|
|
||||||
# NB In principle NaN cannot be checked by membership.
|
# NB In principle NaN cannot be checked by membership.
|
||||||
# Here all NaN values are actually this one, so _NaN is _NaN,
|
# Here all NaN values are actually this one, so _NaN is _NaN,
|
||||||
# although _NaN != _NaN.
|
# although _NaN != _NaN. Ditto Infinity.
|
||||||
|
|
||||||
_NaN = float('nan')
|
_NaN = float('nan')
|
||||||
|
_Infinity = float('inf')
|
||||||
|
|
||||||
|
|
||||||
def _js_bit_op(op):
|
def _js_bit_op(op):
|
||||||
|
|
||||||
def zeroise(x):
|
def zeroise(x):
|
||||||
return 0 if x in (None, JS_Undefined, _NaN) else x
|
return 0 if x in (None, JS_Undefined, _NaN, _Infinity) else x
|
||||||
|
|
||||||
@wraps_op(op)
|
@wraps_op(op)
|
||||||
def wrapped(a, b):
|
def wrapped(a, b):
|
||||||
@ -84,7 +88,7 @@ def _js_arith_op(op):
|
|||||||
def _js_div(a, b):
|
def _js_div(a, b):
|
||||||
if JS_Undefined in (a, b) or not (a or b):
|
if JS_Undefined in (a, b) or not (a or b):
|
||||||
return _NaN
|
return _NaN
|
||||||
return operator.truediv(a or 0, b) if b else float('inf')
|
return operator.truediv(a or 0, b) if b else _Infinity
|
||||||
|
|
||||||
|
|
||||||
def _js_mod(a, b):
|
def _js_mod(a, b):
|
||||||
@ -220,6 +224,42 @@ class LocalNameSpace(ChainMap):
|
|||||||
return 'LocalNameSpace%s' % (self.maps, )
|
return 'LocalNameSpace%s' % (self.maps, )
|
||||||
|
|
||||||
|
|
||||||
|
class Debugger(object):
|
||||||
|
ENABLED = False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def write(*args, **kwargs):
|
||||||
|
level = kwargs.get('level', 100)
|
||||||
|
|
||||||
|
def truncate_string(s, left, right=0):
|
||||||
|
if s is None or len(s) <= left + right:
|
||||||
|
return s
|
||||||
|
return '...'.join((s[:left - 3], s[-right:] if right else ''))
|
||||||
|
|
||||||
|
write_string('[debug] JS: {0}{1}\n'.format(
|
||||||
|
' ' * (100 - level),
|
||||||
|
' '.join(truncate_string(compat_str(x), 50, 50) for x in args)))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def wrap_interpreter(cls, f):
|
||||||
|
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
|
||||||
|
if cls.ENABLED and stmt.strip():
|
||||||
|
cls.write(stmt, level=allow_recursion)
|
||||||
|
try:
|
||||||
|
ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
if cls.ENABLED:
|
||||||
|
if isinstance(e, ExtractorError):
|
||||||
|
e = e.orig_msg
|
||||||
|
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
|
||||||
|
raise
|
||||||
|
if cls.ENABLED and stmt.strip():
|
||||||
|
if should_ret or repr(ret) != stmt:
|
||||||
|
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
|
||||||
|
return ret, should_ret
|
||||||
|
return interpret_statement
|
||||||
|
|
||||||
|
|
||||||
class JSInterpreter(object):
|
class JSInterpreter(object):
|
||||||
__named_object_counter = 0
|
__named_object_counter = 0
|
||||||
|
|
||||||
@ -307,8 +347,7 @@ class JSInterpreter(object):
|
|||||||
def __op_chars(cls):
|
def __op_chars(cls):
|
||||||
op_chars = set(';,[')
|
op_chars = set(';,[')
|
||||||
for op in cls._all_operators():
|
for op in cls._all_operators():
|
||||||
for c in op[0]:
|
op_chars.update(op[0])
|
||||||
op_chars.add(c)
|
|
||||||
return op_chars
|
return op_chars
|
||||||
|
|
||||||
def _named_object(self, namespace, obj):
|
def _named_object(self, namespace, obj):
|
||||||
@ -326,9 +365,10 @@ class JSInterpreter(object):
|
|||||||
# collections.Counter() is ~10% slower in both 2.7 and 3.9
|
# collections.Counter() is ~10% slower in both 2.7 and 3.9
|
||||||
counters = dict((k, 0) for k in _MATCHING_PARENS.values())
|
counters = dict((k, 0) for k in _MATCHING_PARENS.values())
|
||||||
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
||||||
in_quote, escaping, skipping = None, False, 0
|
in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
|
||||||
after_op, in_regex_char_group = True, False
|
skipping = 0
|
||||||
|
if skip_delims:
|
||||||
|
skip_delims = variadic(skip_delims)
|
||||||
for idx, char in enumerate(expr):
|
for idx, char in enumerate(expr):
|
||||||
paren_delta = 0
|
paren_delta = 0
|
||||||
if not in_quote:
|
if not in_quote:
|
||||||
@ -355,7 +395,7 @@ class JSInterpreter(object):
|
|||||||
continue
|
continue
|
||||||
elif pos == 0 and skip_delims:
|
elif pos == 0 and skip_delims:
|
||||||
here = expr[idx:]
|
here = expr[idx:]
|
||||||
for s in variadic(skip_delims):
|
for s in skip_delims:
|
||||||
if here.startswith(s) and s:
|
if here.startswith(s) and s:
|
||||||
skipping = len(s) - 1
|
skipping = len(s) - 1
|
||||||
break
|
break
|
||||||
@ -376,16 +416,17 @@ class JSInterpreter(object):
|
|||||||
if delim is None:
|
if delim is None:
|
||||||
delim = expr and _MATCHING_PARENS[expr[0]]
|
delim = expr and _MATCHING_PARENS[expr[0]]
|
||||||
separated = list(cls._separate(expr, delim, 1))
|
separated = list(cls._separate(expr, delim, 1))
|
||||||
|
|
||||||
if len(separated) < 2:
|
if len(separated) < 2:
|
||||||
raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals()))
|
raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals()))
|
||||||
return separated[0][1:].strip(), separated[1].strip()
|
return separated[0][1:].strip(), separated[1].strip()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _all_operators():
|
def _all_operators(_cached=[]):
|
||||||
return itertools.chain(
|
if not _cached:
|
||||||
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
|
_cached.extend(itertools.chain(
|
||||||
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)
|
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
|
||||||
|
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
|
||||||
|
return _cached
|
||||||
|
|
||||||
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
||||||
if op in ('||', '&&'):
|
if op in ('||', '&&'):
|
||||||
@ -416,7 +457,7 @@ class JSInterpreter(object):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
if allow_undefined:
|
if allow_undefined:
|
||||||
return JS_Undefined
|
return JS_Undefined
|
||||||
raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e)
|
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
|
||||||
|
|
||||||
def _dump(self, obj, namespace):
|
def _dump(self, obj, namespace):
|
||||||
try:
|
try:
|
||||||
@ -438,6 +479,7 @@ class JSInterpreter(object):
|
|||||||
_FINALLY_RE = re.compile(r'finally\s*\{')
|
_FINALLY_RE = re.compile(r'finally\s*\{')
|
||||||
_SWITCH_RE = re.compile(r'switch\s*\(')
|
_SWITCH_RE = re.compile(r'switch\s*\(')
|
||||||
|
|
||||||
|
@Debugger.wrap_interpreter
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||||
if allow_recursion < 0:
|
if allow_recursion < 0:
|
||||||
raise self.Exception('Recursion limit reached')
|
raise self.Exception('Recursion limit reached')
|
||||||
@ -448,6 +490,7 @@ class JSInterpreter(object):
|
|||||||
# fails on (eg) if (...) stmt1; else stmt2;
|
# fails on (eg) if (...) stmt1; else stmt2;
|
||||||
sub_statements = list(self._separate(stmt, ';')) or ['']
|
sub_statements = list(self._separate(stmt, ';')) or ['']
|
||||||
expr = stmt = sub_statements.pop().strip()
|
expr = stmt = sub_statements.pop().strip()
|
||||||
|
|
||||||
for sub_stmt in sub_statements:
|
for sub_stmt in sub_statements:
|
||||||
ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
|
ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
|
||||||
if should_return:
|
if should_return:
|
||||||
@ -511,7 +554,6 @@ class JSInterpreter(object):
|
|||||||
expr = self._dump(inner, local_vars) + outer
|
expr = self._dump(inner, local_vars) + outer
|
||||||
|
|
||||||
if expr.startswith('('):
|
if expr.startswith('('):
|
||||||
|
|
||||||
m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
|
m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
|
||||||
if m:
|
if m:
|
||||||
# short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
|
# short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
|
||||||
@ -588,8 +630,7 @@ class JSInterpreter(object):
|
|||||||
if m.group('err'):
|
if m.group('err'):
|
||||||
catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
|
catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
|
||||||
catch_vars = local_vars.new_child(m=catch_vars)
|
catch_vars = local_vars.new_child(m=catch_vars)
|
||||||
err = None
|
err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion)
|
||||||
pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
|
|
||||||
|
|
||||||
m = self._FINALLY_RE.match(expr)
|
m = self._FINALLY_RE.match(expr)
|
||||||
if m:
|
if m:
|
||||||
@ -693,7 +734,7 @@ class JSInterpreter(object):
|
|||||||
(?P<op>{_OPERATOR_RE})?
|
(?P<op>{_OPERATOR_RE})?
|
||||||
=(?!=)(?P<expr>.*)$
|
=(?!=)(?P<expr>.*)$
|
||||||
)|(?P<return>
|
)|(?P<return>
|
||||||
(?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
|
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
|
||||||
)|(?P<indexing>
|
)|(?P<indexing>
|
||||||
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
|
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
|
||||||
)|(?P<attribute>
|
)|(?P<attribute>
|
||||||
@ -727,11 +768,12 @@ class JSInterpreter(object):
|
|||||||
raise JS_Break()
|
raise JS_Break()
|
||||||
elif expr == 'continue':
|
elif expr == 'continue':
|
||||||
raise JS_Continue()
|
raise JS_Continue()
|
||||||
|
|
||||||
elif expr == 'undefined':
|
elif expr == 'undefined':
|
||||||
return JS_Undefined, should_return
|
return JS_Undefined, should_return
|
||||||
elif expr == 'NaN':
|
elif expr == 'NaN':
|
||||||
return _NaN, should_return
|
return _NaN, should_return
|
||||||
|
elif expr == 'Infinity':
|
||||||
|
return _Infinity, should_return
|
||||||
|
|
||||||
elif md.get('return'):
|
elif md.get('return'):
|
||||||
return local_vars[m.group('name')], should_return
|
return local_vars[m.group('name')], should_return
|
||||||
@ -760,18 +802,31 @@ class JSInterpreter(object):
|
|||||||
right_expr = separated.pop()
|
right_expr = separated.pop()
|
||||||
# handle operators that are both unary and binary, minimal BODMAS
|
# handle operators that are both unary and binary, minimal BODMAS
|
||||||
if op in ('+', '-'):
|
if op in ('+', '-'):
|
||||||
|
# simplify/adjust consecutive instances of these operators
|
||||||
undone = 0
|
undone = 0
|
||||||
while len(separated) > 1 and not separated[-1].strip():
|
separated = [s.strip() for s in separated]
|
||||||
|
while len(separated) > 1 and not separated[-1]:
|
||||||
undone += 1
|
undone += 1
|
||||||
separated.pop()
|
separated.pop()
|
||||||
if op == '-' and undone % 2 != 0:
|
if op == '-' and undone % 2 != 0:
|
||||||
right_expr = op + right_expr
|
right_expr = op + right_expr
|
||||||
left_val = separated[-1]
|
elif op == '+':
|
||||||
|
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
|
||||||
|
right_expr = separated.pop() + right_expr
|
||||||
|
if separated[-1][-1:] in self.OP_CHARS:
|
||||||
|
right_expr = separated.pop() + right_expr
|
||||||
|
# hanging op at end of left => unary + (strip) or - (push right)
|
||||||
|
left_val = separated[-1] if separated else ''
|
||||||
for dm_op in ('*', '%', '/', '**'):
|
for dm_op in ('*', '%', '/', '**'):
|
||||||
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
|
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
|
||||||
if len(bodmas) > 1 and not bodmas[-1].strip():
|
if len(bodmas) > 1 and not bodmas[-1].strip():
|
||||||
expr = op.join(separated) + op + right_expr
|
expr = op.join(separated) + op + right_expr
|
||||||
right_expr = None
|
if len(separated) > 1:
|
||||||
|
separated.pop()
|
||||||
|
right_expr = op.join((left_val, right_expr))
|
||||||
|
else:
|
||||||
|
separated = [op.join((left_val, right_expr))]
|
||||||
|
right_expr = None
|
||||||
break
|
break
|
||||||
if right_expr is None:
|
if right_expr is None:
|
||||||
continue
|
continue
|
||||||
@ -795,12 +850,15 @@ class JSInterpreter(object):
|
|||||||
memb = member
|
memb = member
|
||||||
raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
|
raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
|
||||||
|
|
||||||
def eval_method():
|
def eval_method(variable, member):
|
||||||
if (variable, member) == ('console', 'debug'):
|
if (variable, member) == ('console', 'debug'):
|
||||||
|
if Debugger.ENABLED:
|
||||||
|
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
|
||||||
return
|
return
|
||||||
types = {
|
types = {
|
||||||
'String': compat_str,
|
'String': compat_str,
|
||||||
'Math': float,
|
'Math': float,
|
||||||
|
'Array': list,
|
||||||
}
|
}
|
||||||
obj = local_vars.get(variable)
|
obj = local_vars.get(variable)
|
||||||
if obj in (JS_Undefined, None):
|
if obj in (JS_Undefined, None):
|
||||||
@ -826,12 +884,29 @@ class JSInterpreter(object):
|
|||||||
self.interpret_expression(v, local_vars, allow_recursion)
|
self.interpret_expression(v, local_vars, allow_recursion)
|
||||||
for v in self._separate(arg_str)]
|
for v in self._separate(arg_str)]
|
||||||
|
|
||||||
if obj == compat_str:
|
# Fixup prototype call
|
||||||
|
if isinstance(obj, type):
|
||||||
|
new_member, rest = member.partition('.')[0::2]
|
||||||
|
if new_member == 'prototype':
|
||||||
|
new_member, func_prototype = rest.partition('.')[0::2]
|
||||||
|
assertion(argvals, 'takes one or more arguments')
|
||||||
|
assertion(isinstance(argvals[0], obj), 'must bind to type {0}'.format(obj))
|
||||||
|
if func_prototype == 'call':
|
||||||
|
obj = argvals.pop(0)
|
||||||
|
elif func_prototype == 'apply':
|
||||||
|
assertion(len(argvals) == 2, 'takes two arguments')
|
||||||
|
obj, argvals = argvals
|
||||||
|
assertion(isinstance(argvals, list), 'second argument must be a list')
|
||||||
|
else:
|
||||||
|
raise self.Exception('Unsupported Function method ' + func_prototype, expr)
|
||||||
|
member = new_member
|
||||||
|
|
||||||
|
if obj is compat_str:
|
||||||
if member == 'fromCharCode':
|
if member == 'fromCharCode':
|
||||||
assertion(argvals, 'takes one or more arguments')
|
assertion(argvals, 'takes one or more arguments')
|
||||||
return ''.join(map(compat_chr, argvals))
|
return ''.join(map(compat_chr, argvals))
|
||||||
raise self.Exception('Unsupported string method ' + member, expr=expr)
|
raise self.Exception('Unsupported string method ' + member, expr=expr)
|
||||||
elif obj == float:
|
elif obj is float:
|
||||||
if member == 'pow':
|
if member == 'pow':
|
||||||
assertion(len(argvals) == 2, 'takes two arguments')
|
assertion(len(argvals) == 2, 'takes two arguments')
|
||||||
return argvals[0] ** argvals[1]
|
return argvals[0] ** argvals[1]
|
||||||
@ -850,18 +925,25 @@ class JSInterpreter(object):
|
|||||||
obj.reverse()
|
obj.reverse()
|
||||||
return obj
|
return obj
|
||||||
elif member == 'slice':
|
elif member == 'slice':
|
||||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
assertion(isinstance(obj, (list, compat_str)), 'must be applied on a list or string')
|
||||||
assertion(len(argvals) == 1, 'takes exactly one argument')
|
# From [1]:
|
||||||
return obj[argvals[0]:]
|
# .slice() - like [:]
|
||||||
|
# .slice(n) - like [n:] (not [slice(n)]
|
||||||
|
# .slice(m, n) - like [m:n] or [slice(m, n)]
|
||||||
|
# [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
|
||||||
|
assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
|
||||||
|
if len(argvals) < 2:
|
||||||
|
argvals += (None,)
|
||||||
|
return obj[slice(*argvals)]
|
||||||
elif member == 'splice':
|
elif member == 'splice':
|
||||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||||
assertion(argvals, 'takes one or more arguments')
|
assertion(argvals, 'takes one or more arguments')
|
||||||
index, howMany = map(int, (argvals + [len(obj)])[:2])
|
index, how_many = map(int, (argvals + [len(obj)])[:2])
|
||||||
if index < 0:
|
if index < 0:
|
||||||
index += len(obj)
|
index += len(obj)
|
||||||
add_items = argvals[2:]
|
add_items = argvals[2:]
|
||||||
res = []
|
res = []
|
||||||
for i in range(index, min(index + howMany, len(obj))):
|
for _ in range(index, min(index + how_many, len(obj))):
|
||||||
res.append(obj.pop(index))
|
res.append(obj.pop(index))
|
||||||
for i, item in enumerate(add_items):
|
for i, item in enumerate(add_items):
|
||||||
obj.insert(index + i, item)
|
obj.insert(index + i, item)
|
||||||
@ -919,11 +1001,11 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
if remaining:
|
if remaining:
|
||||||
ret, should_abort = self.interpret_statement(
|
ret, should_abort = self.interpret_statement(
|
||||||
self._named_object(local_vars, eval_method()) + remaining,
|
self._named_object(local_vars, eval_method(variable, member)) + remaining,
|
||||||
local_vars, allow_recursion)
|
local_vars, allow_recursion)
|
||||||
return ret, should_return or should_abort
|
return ret, should_return or should_abort
|
||||||
else:
|
else:
|
||||||
return eval_method(), should_return
|
return eval_method(variable, member), should_return
|
||||||
|
|
||||||
elif md.get('function'):
|
elif md.get('function'):
|
||||||
fname = m.group('fname')
|
fname = m.group('fname')
|
||||||
@ -951,28 +1033,25 @@ class JSInterpreter(object):
|
|||||||
def extract_object(self, objname):
|
def extract_object(self, objname):
|
||||||
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
||||||
obj = {}
|
obj = {}
|
||||||
fields = None
|
fields = next(filter(None, (
|
||||||
for obj_m in re.finditer(
|
obj_m.group('fields') for obj_m in re.finditer(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
|
{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
|
||||||
(?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
|
(?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
|
||||||
}}\s*;
|
}}\s*;
|
||||||
'''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
|
'''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
|
||||||
self.code):
|
self.code))), None)
|
||||||
fields = obj_m.group('fields')
|
if not fields:
|
||||||
if fields:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise self.Exception('Could not find object ' + objname)
|
raise self.Exception('Could not find object ' + objname)
|
||||||
# Currently, it only supports function definitions
|
# Currently, it only supports function definitions
|
||||||
fields_m = re.finditer(
|
for f in re.finditer(
|
||||||
r'''(?x)
|
r'''(?x)
|
||||||
(?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
|
(?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
|
||||||
''' % (_FUNC_NAME_RE, _NAME_RE),
|
''' % (_FUNC_NAME_RE, _NAME_RE),
|
||||||
fields)
|
fields):
|
||||||
for f in fields_m:
|
|
||||||
argnames = self.build_arglist(f.group('args'))
|
argnames = self.build_arglist(f.group('args'))
|
||||||
obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
|
name = remove_quotes(f.group('key'))
|
||||||
|
obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), 'F<{0}>'.format(name))
|
||||||
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
@ -1007,7 +1086,7 @@ class JSInterpreter(object):
|
|||||||
def extract_function(self, funcname):
|
def extract_function(self, funcname):
|
||||||
return function_with_repr(
|
return function_with_repr(
|
||||||
self.extract_function_from_code(*self.extract_function_code(funcname)),
|
self.extract_function_from_code(*self.extract_function_code(funcname)),
|
||||||
'F<%s>' % (funcname, ))
|
'F<%s>' % (funcname,))
|
||||||
|
|
||||||
def extract_function_from_code(self, argnames, code, *global_stack):
|
def extract_function_from_code(self, argnames, code, *global_stack):
|
||||||
local_vars = {}
|
local_vars = {}
|
||||||
@ -1016,7 +1095,7 @@ class JSInterpreter(object):
|
|||||||
if mobj is None:
|
if mobj is None:
|
||||||
break
|
break
|
||||||
start, body_start = mobj.span()
|
start, body_start = mobj.span()
|
||||||
body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
|
body, remaining = self._separate_at_paren(code[body_start - 1:])
|
||||||
name = self._named_object(local_vars, self.extract_function_from_code(
|
name = self._named_object(local_vars, self.extract_function_from_code(
|
||||||
[x.strip() for x in mobj.group('args').split(',')],
|
[x.strip() for x in mobj.group('args').split(',')],
|
||||||
body, local_vars, *global_stack))
|
body, local_vars, *global_stack))
|
||||||
@ -1044,8 +1123,7 @@ class JSInterpreter(object):
|
|||||||
argnames = tuple(argnames)
|
argnames = tuple(argnames)
|
||||||
|
|
||||||
def resf(args, kwargs={}, allow_recursion=100):
|
def resf(args, kwargs={}, allow_recursion=100):
|
||||||
global_stack[0].update(
|
global_stack[0].update(zip_longest(argnames, args, fillvalue=None))
|
||||||
zip_longest(argnames, args, fillvalue=None))
|
|
||||||
global_stack[0].update(kwargs)
|
global_stack[0].update(kwargs)
|
||||||
var_stack = LocalNameSpace(*global_stack)
|
var_stack = LocalNameSpace(*global_stack)
|
||||||
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
|
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
|
||||||
|
@ -533,6 +533,10 @@ def parseOpts(overrideArguments=None):
|
|||||||
'--no-check-certificate',
|
'--no-check-certificate',
|
||||||
action='store_true', dest='no_check_certificate', default=False,
|
action='store_true', dest='no_check_certificate', default=False,
|
||||||
help='Suppress HTTPS certificate validation')
|
help='Suppress HTTPS certificate validation')
|
||||||
|
workarounds.add_option(
|
||||||
|
'--no-check-extensions',
|
||||||
|
action='store_true', dest='no_check_extensions', default=False,
|
||||||
|
help='Suppress file extension validation')
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--prefer-insecure',
|
'--prefer-insecure',
|
||||||
'--prefer-unsecure', action='store_true', dest='prefer_insecure',
|
'--prefer-unsecure', action='store_true', dest='prefer_insecure',
|
||||||
@ -720,7 +724,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'-w', '--no-overwrites',
|
'-w', '--no-overwrites',
|
||||||
action='store_true', dest='nooverwrites', default=False,
|
action='store_true', dest='nooverwrites', default=False,
|
||||||
help='Do not overwrite files')
|
help='Do not overwrite files. (For the opposite, see --no-continue.)')
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'-c', '--continue',
|
'-c', '--continue',
|
||||||
action='store_true', dest='continue_dl', default=True,
|
action='store_true', dest='continue_dl', default=True,
|
||||||
@ -728,7 +732,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--no-continue',
|
'--no-continue',
|
||||||
action='store_false', dest='continue_dl',
|
action='store_false', dest='continue_dl',
|
||||||
help='Do not resume partially downloaded files (restart from beginning)')
|
help='Do not resume partially downloaded files: Instead, restart from the beginning and ruthlessly overwrite existing files.')
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--no-part',
|
'--no-part',
|
||||||
action='store_true', dest='nopart', default=False,
|
action='store_true', dest='nopart', default=False,
|
||||||
|
10
youtube_dl/traversal.py
Normal file
10
youtube_dl/traversal.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# TODO: move these utils.fns here and move import to utils
|
||||||
|
# flake8: noqa
|
||||||
|
from .utils import (
|
||||||
|
dict_get,
|
||||||
|
get_first,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
)
|
@ -49,11 +49,14 @@ from .compat import (
|
|||||||
compat_cookiejar,
|
compat_cookiejar,
|
||||||
compat_ctypes_WINFUNCTYPE,
|
compat_ctypes_WINFUNCTYPE,
|
||||||
compat_datetime_timedelta_total_seconds,
|
compat_datetime_timedelta_total_seconds,
|
||||||
|
compat_etree_Element,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
|
compat_etree_iterfind,
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
compat_html_entities,
|
compat_html_entities,
|
||||||
compat_html_entities_html5,
|
compat_html_entities_html5,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
|
compat_http_cookies,
|
||||||
compat_integer_types,
|
compat_integer_types,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
compat_ncompress as ncompress,
|
compat_ncompress as ncompress,
|
||||||
@ -1714,21 +1717,6 @@ TIMEZONE_NAMES = {
|
|||||||
'PST': -8, 'PDT': -7 # Pacific
|
'PST': -8, 'PDT': -7 # Pacific
|
||||||
}
|
}
|
||||||
|
|
||||||
KNOWN_EXTENSIONS = (
|
|
||||||
'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
|
|
||||||
'flv', 'f4v', 'f4a', 'f4b',
|
|
||||||
'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
|
|
||||||
'mkv', 'mka', 'mk3d',
|
|
||||||
'avi', 'divx',
|
|
||||||
'mov',
|
|
||||||
'asf', 'wmv', 'wma',
|
|
||||||
'3gp', '3g2',
|
|
||||||
'mp3',
|
|
||||||
'flac',
|
|
||||||
'ape',
|
|
||||||
'wav',
|
|
||||||
'f4f', 'f4m', 'm3u8', 'smil')
|
|
||||||
|
|
||||||
# needed for sanitizing filenames in restricted mode
|
# needed for sanitizing filenames in restricted mode
|
||||||
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
|
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
|
||||||
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
|
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
|
||||||
@ -2403,7 +2391,7 @@ class ExtractorError(YoutubeDLError):
|
|||||||
""" tb, if given, is the original traceback (so that it can be printed out).
|
""" tb, if given, is the original traceback (so that it can be printed out).
|
||||||
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
|
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
|
||||||
"""
|
"""
|
||||||
|
self.orig_msg = msg
|
||||||
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
|
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
|
||||||
expected = True
|
expected = True
|
||||||
if video_id is not None:
|
if video_id is not None:
|
||||||
@ -3956,19 +3944,22 @@ def parse_duration(s):
|
|||||||
return duration
|
return duration
|
||||||
|
|
||||||
|
|
||||||
def prepend_extension(filename, ext, expected_real_ext=None):
|
def _change_extension(prepend, filename, ext, expected_real_ext=None):
|
||||||
name, real_ext = os.path.splitext(filename)
|
name, real_ext = os.path.splitext(filename)
|
||||||
return (
|
sanitize_extension = _UnsafeExtensionError.sanitize_extension
|
||||||
'{0}.{1}{2}'.format(name, ext, real_ext)
|
|
||||||
if not expected_real_ext or real_ext[1:] == expected_real_ext
|
if not expected_real_ext or real_ext.partition('.')[0::2] == ('', expected_real_ext):
|
||||||
else '{0}.{1}'.format(filename, ext))
|
filename = name
|
||||||
|
if prepend and real_ext:
|
||||||
|
sanitize_extension(ext, prepend=prepend)
|
||||||
|
return ''.join((filename, '.', ext, real_ext))
|
||||||
|
|
||||||
|
# Mitigate path traversal and file impersonation attacks
|
||||||
|
return '.'.join((filename, sanitize_extension(ext)))
|
||||||
|
|
||||||
|
|
||||||
def replace_extension(filename, ext, expected_real_ext=None):
|
prepend_extension = functools.partial(_change_extension, True)
|
||||||
name, real_ext = os.path.splitext(filename)
|
replace_extension = functools.partial(_change_extension, False)
|
||||||
return '{0}.{1}'.format(
|
|
||||||
name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
|
|
||||||
ext)
|
|
||||||
|
|
||||||
|
|
||||||
def check_executable(exe, args=[]):
|
def check_executable(exe, args=[]):
|
||||||
@ -6253,15 +6244,16 @@ if __debug__:
|
|||||||
|
|
||||||
def traverse_obj(obj, *paths, **kwargs):
|
def traverse_obj(obj, *paths, **kwargs):
|
||||||
"""
|
"""
|
||||||
Safely traverse nested `dict`s and `Iterable`s
|
Safely traverse nested `dict`s and `Iterable`s, etc
|
||||||
|
|
||||||
>>> obj = [{}, {"key": "value"}]
|
>>> obj = [{}, {"key": "value"}]
|
||||||
>>> traverse_obj(obj, (1, "key"))
|
>>> traverse_obj(obj, (1, "key"))
|
||||||
"value"
|
'value'
|
||||||
|
|
||||||
Each of the provided `paths` is tested and the first producing a valid result will be returned.
|
Each of the provided `paths` is tested and the first producing a valid result will be returned.
|
||||||
The next path will also be tested if the path branched but no results could be found.
|
The next path will also be tested if the path branched but no results could be found.
|
||||||
Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
|
Supported values for traversal are `Mapping`, `Iterable`, `re.Match`, `xml.etree.ElementTree`
|
||||||
|
(xpath) and `http.cookies.Morsel`.
|
||||||
Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
|
Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
|
||||||
|
|
||||||
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
|
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
|
||||||
@ -6269,8 +6261,9 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
The keys in the path can be one of:
|
The keys in the path can be one of:
|
||||||
- `None`: Return the current object.
|
- `None`: Return the current object.
|
||||||
- `set`: Requires the only item in the set to be a type or function,
|
- `set`: Requires the only item in the set to be a type or function,
|
||||||
like `{type}`/`{func}`. If a `type`, returns only values
|
like `{type}`/`{type, type, ...}`/`{func}`. If one or more `type`s,
|
||||||
of this type. If a function, returns `func(obj)`.
|
return only values that have one of the types. If a function,
|
||||||
|
return `func(obj)`.
|
||||||
- `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
|
- `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
|
||||||
- `slice`: Branch out and return all values in `obj[key]`.
|
- `slice`: Branch out and return all values in `obj[key]`.
|
||||||
- `Ellipsis`: Branch out and return a list of all values.
|
- `Ellipsis`: Branch out and return a list of all values.
|
||||||
@ -6282,8 +6275,10 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
For `Iterable`s, `key` is the enumeration count of the value.
|
For `Iterable`s, `key` is the enumeration count of the value.
|
||||||
For `re.Match`es, `key` is the group number (0 = full match)
|
For `re.Match`es, `key` is the group number (0 = full match)
|
||||||
as well as additionally any group names, if given.
|
as well as additionally any group names, if given.
|
||||||
- `dict` Transform the current object and return a matching dict.
|
- `dict`: Transform the current object and return a matching dict.
|
||||||
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
|
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
|
||||||
|
- `any`-builtin: Take the first matching object and return it, resetting branching.
|
||||||
|
- `all`-builtin: Take all matching objects and return them as a list, resetting branching.
|
||||||
|
|
||||||
`tuple`, `list`, and `dict` all support nested paths and branches.
|
`tuple`, `list`, and `dict` all support nested paths and branches.
|
||||||
|
|
||||||
@ -6299,10 +6294,8 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
@param get_all If `False`, return the first matching result, otherwise all matching ones.
|
@param get_all If `False`, return the first matching result, otherwise all matching ones.
|
||||||
@param casesense If `False`, consider string dictionary keys as case insensitive.
|
@param casesense If `False`, consider string dictionary keys as case insensitive.
|
||||||
|
|
||||||
The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
|
The following is only meant to be used by YoutubeDL.prepare_outtmpl and is not part of the API
|
||||||
|
|
||||||
@param _is_user_input Whether the keys are generated from user input.
|
|
||||||
If `True` strings get converted to `int`/`slice` if needed.
|
|
||||||
@param _traverse_string Whether to traverse into objects as strings.
|
@param _traverse_string Whether to traverse into objects as strings.
|
||||||
If `True`, any non-compatible object will first be
|
If `True`, any non-compatible object will first be
|
||||||
converted into a string and then traversed into.
|
converted into a string and then traversed into.
|
||||||
@ -6322,7 +6315,6 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
expected_type = kwargs.get('expected_type')
|
expected_type = kwargs.get('expected_type')
|
||||||
get_all = kwargs.get('get_all', True)
|
get_all = kwargs.get('get_all', True)
|
||||||
casesense = kwargs.get('casesense', True)
|
casesense = kwargs.get('casesense', True)
|
||||||
_is_user_input = kwargs.get('_is_user_input', False)
|
|
||||||
_traverse_string = kwargs.get('_traverse_string', False)
|
_traverse_string = kwargs.get('_traverse_string', False)
|
||||||
|
|
||||||
# instant compat
|
# instant compat
|
||||||
@ -6336,10 +6328,8 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
|
type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
|
||||||
|
|
||||||
def lookup_or_none(v, k, getter=None):
|
def lookup_or_none(v, k, getter=None):
|
||||||
try:
|
with compat_contextlib_suppress(LookupError):
|
||||||
return getter(v, k) if getter else v[k]
|
return getter(v, k) if getter else v[k]
|
||||||
except IndexError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def from_iterable(iterables):
|
def from_iterable(iterables):
|
||||||
# chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
|
# chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
|
||||||
@ -6361,12 +6351,13 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
result = obj
|
result = obj
|
||||||
|
|
||||||
elif isinstance(key, set):
|
elif isinstance(key, set):
|
||||||
assert len(key) == 1, 'Set should only be used to wrap a single item'
|
assert len(key) >= 1, 'At least one item is required in a `set` key'
|
||||||
item = next(iter(key))
|
if all(isinstance(item, type) for item in key):
|
||||||
if isinstance(item, type):
|
result = obj if isinstance(obj, tuple(key)) else None
|
||||||
result = obj if isinstance(obj, item) else None
|
|
||||||
else:
|
else:
|
||||||
result = try_call(item, args=(obj,))
|
item = next(iter(key))
|
||||||
|
assert len(key) == 1, 'Multiple items in a `set` key must all be types'
|
||||||
|
result = try_call(item, args=(obj,)) if not isinstance(item, type) else None
|
||||||
|
|
||||||
elif isinstance(key, (list, tuple)):
|
elif isinstance(key, (list, tuple)):
|
||||||
branching = True
|
branching = True
|
||||||
@ -6375,9 +6366,11 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
|
|
||||||
elif key is Ellipsis:
|
elif key is Ellipsis:
|
||||||
branching = True
|
branching = True
|
||||||
|
if isinstance(obj, compat_http_cookies.Morsel):
|
||||||
|
obj = dict(obj, key=obj.key, value=obj.value)
|
||||||
if isinstance(obj, compat_collections_abc.Mapping):
|
if isinstance(obj, compat_collections_abc.Mapping):
|
||||||
result = obj.values()
|
result = obj.values()
|
||||||
elif is_iterable_like(obj):
|
elif is_iterable_like(obj, (compat_collections_abc.Iterable, compat_etree_Element)):
|
||||||
result = obj
|
result = obj
|
||||||
elif isinstance(obj, compat_re_Match):
|
elif isinstance(obj, compat_re_Match):
|
||||||
result = obj.groups()
|
result = obj.groups()
|
||||||
@ -6389,9 +6382,11 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
|
|
||||||
elif callable(key):
|
elif callable(key):
|
||||||
branching = True
|
branching = True
|
||||||
|
if isinstance(obj, compat_http_cookies.Morsel):
|
||||||
|
obj = dict(obj, key=obj.key, value=obj.value)
|
||||||
if isinstance(obj, compat_collections_abc.Mapping):
|
if isinstance(obj, compat_collections_abc.Mapping):
|
||||||
iter_obj = obj.items()
|
iter_obj = obj.items()
|
||||||
elif is_iterable_like(obj):
|
elif is_iterable_like(obj, (compat_collections_abc.Iterable, compat_etree_Element)):
|
||||||
iter_obj = enumerate(obj)
|
iter_obj = enumerate(obj)
|
||||||
elif isinstance(obj, compat_re_Match):
|
elif isinstance(obj, compat_re_Match):
|
||||||
iter_obj = itertools.chain(
|
iter_obj = itertools.chain(
|
||||||
@ -6413,6 +6408,8 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
if v is not None or default is not NO_DEFAULT) or None
|
if v is not None or default is not NO_DEFAULT) or None
|
||||||
|
|
||||||
elif isinstance(obj, compat_collections_abc.Mapping):
|
elif isinstance(obj, compat_collections_abc.Mapping):
|
||||||
|
if isinstance(obj, compat_http_cookies.Morsel):
|
||||||
|
obj = dict(obj, key=obj.key, value=obj.value)
|
||||||
result = (try_call(obj.get, args=(key,))
|
result = (try_call(obj.get, args=(key,))
|
||||||
if casesense or try_call(obj.__contains__, args=(key,))
|
if casesense or try_call(obj.__contains__, args=(key,))
|
||||||
else next((v for k, v in obj.items() if casefold(k) == key), None))
|
else next((v for k, v in obj.items() if casefold(k) == key), None))
|
||||||
@ -6430,12 +6427,40 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
else:
|
else:
|
||||||
result = None
|
result = None
|
||||||
if isinstance(key, (int, slice)):
|
if isinstance(key, (int, slice)):
|
||||||
if is_iterable_like(obj, compat_collections_abc.Sequence):
|
if is_iterable_like(obj, (compat_collections_abc.Sequence, compat_etree_Element)):
|
||||||
branching = isinstance(key, slice)
|
branching = isinstance(key, slice)
|
||||||
result = lookup_or_none(obj, key)
|
result = lookup_or_none(obj, key)
|
||||||
elif _traverse_string:
|
elif _traverse_string:
|
||||||
result = lookup_or_none(str(obj), key)
|
result = lookup_or_none(str(obj), key)
|
||||||
|
|
||||||
|
elif isinstance(obj, compat_etree_Element) and isinstance(key, str):
|
||||||
|
xpath, _, special = key.rpartition('/')
|
||||||
|
if not special.startswith('@') and not special.endswith('()'):
|
||||||
|
xpath = key
|
||||||
|
special = None
|
||||||
|
|
||||||
|
# Allow abbreviations of relative paths, absolute paths error
|
||||||
|
if xpath.startswith('/'):
|
||||||
|
xpath = '.' + xpath
|
||||||
|
elif xpath and not xpath.startswith('./'):
|
||||||
|
xpath = './' + xpath
|
||||||
|
|
||||||
|
def apply_specials(element):
|
||||||
|
if special is None:
|
||||||
|
return element
|
||||||
|
if special == '@':
|
||||||
|
return element.attrib
|
||||||
|
if special.startswith('@'):
|
||||||
|
return try_call(element.attrib.get, args=(special[1:],))
|
||||||
|
if special == 'text()':
|
||||||
|
return element.text
|
||||||
|
raise SyntaxError('apply_specials is missing case for {0!r}'.format(special))
|
||||||
|
|
||||||
|
if xpath:
|
||||||
|
result = list(map(apply_specials, compat_etree_iterfind(obj, xpath)))
|
||||||
|
else:
|
||||||
|
result = apply_specials(obj)
|
||||||
|
|
||||||
return branching, result if branching else (result,)
|
return branching, result if branching else (result,)
|
||||||
|
|
||||||
def lazy_last(iterable):
|
def lazy_last(iterable):
|
||||||
@ -6456,17 +6481,18 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
|
|
||||||
key = None
|
key = None
|
||||||
for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
|
for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
|
||||||
if _is_user_input and isinstance(key, str):
|
|
||||||
if key == ':':
|
|
||||||
key = Ellipsis
|
|
||||||
elif ':' in key:
|
|
||||||
key = slice(*map(int_or_none, key.split(':')))
|
|
||||||
elif int_or_none(key) is not None:
|
|
||||||
key = int(key)
|
|
||||||
|
|
||||||
if not casesense and isinstance(key, str):
|
if not casesense and isinstance(key, str):
|
||||||
key = compat_casefold(key)
|
key = compat_casefold(key)
|
||||||
|
|
||||||
|
if key in (any, all):
|
||||||
|
has_branched = False
|
||||||
|
filtered_objs = (obj for obj in objs if obj not in (None, {}))
|
||||||
|
if key is any:
|
||||||
|
objs = (next(filtered_objs, None),)
|
||||||
|
else:
|
||||||
|
objs = (list(filtered_objs),)
|
||||||
|
continue
|
||||||
|
|
||||||
if __debug__ and callable(key):
|
if __debug__ and callable(key):
|
||||||
# Verify function signature
|
# Verify function signature
|
||||||
_try_bind_args(key, None, None)
|
_try_bind_args(key, None, None)
|
||||||
@ -6505,9 +6531,9 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
return None if default is NO_DEFAULT else default
|
return None if default is NO_DEFAULT else default
|
||||||
|
|
||||||
|
|
||||||
def T(x):
|
def T(*x):
|
||||||
""" For use in yt-dl instead of {type} or set((type,)) """
|
""" For use in yt-dl instead of {type, ...} or set((type, ...)) """
|
||||||
return set((x,))
|
return set(x)
|
||||||
|
|
||||||
|
|
||||||
def get_first(obj, keys, **kwargs):
|
def get_first(obj, keys, **kwargs):
|
||||||
@ -6523,3 +6549,169 @@ def join_nonempty(*values, **kwargs):
|
|||||||
if from_dict is not None:
|
if from_dict is not None:
|
||||||
values = (traverse_obj(from_dict, variadic(v)) for v in values)
|
values = (traverse_obj(from_dict, variadic(v)) for v in values)
|
||||||
return delim.join(map(compat_str, filter(None, values)))
|
return delim.join(map(compat_str, filter(None, values)))
|
||||||
|
|
||||||
|
|
||||||
|
class Namespace(object):
|
||||||
|
"""Immutable namespace"""
|
||||||
|
|
||||||
|
def __init__(self, **kw_attr):
|
||||||
|
self.__dict__.update(kw_attr)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.__dict__.values())
|
||||||
|
|
||||||
|
@property
|
||||||
|
def items_(self):
|
||||||
|
return self.__dict__.items()
|
||||||
|
|
||||||
|
|
||||||
|
MEDIA_EXTENSIONS = Namespace(
|
||||||
|
common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
|
||||||
|
video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
|
||||||
|
common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
|
||||||
|
audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
|
||||||
|
thumbnails=('jpg', 'png', 'webp'),
|
||||||
|
# storyboards=('mhtml', ),
|
||||||
|
subtitles=('srt', 'vtt', 'ass', 'lrc', 'ttml'),
|
||||||
|
manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
|
||||||
|
)
|
||||||
|
MEDIA_EXTENSIONS.video = MEDIA_EXTENSIONS.common_video + MEDIA_EXTENSIONS.video
|
||||||
|
MEDIA_EXTENSIONS.audio = MEDIA_EXTENSIONS.common_audio + MEDIA_EXTENSIONS.audio
|
||||||
|
|
||||||
|
KNOWN_EXTENSIONS = (
|
||||||
|
MEDIA_EXTENSIONS.video + MEDIA_EXTENSIONS.audio
|
||||||
|
+ MEDIA_EXTENSIONS.manifests
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class _UnsafeExtensionError(Exception):
|
||||||
|
"""
|
||||||
|
Mitigation exception for unwanted file overwrite/path traversal
|
||||||
|
|
||||||
|
Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
|
||||||
|
"""
|
||||||
|
_ALLOWED_EXTENSIONS = frozenset(itertools.chain(
|
||||||
|
( # internal
|
||||||
|
'description',
|
||||||
|
'json',
|
||||||
|
'meta',
|
||||||
|
'orig',
|
||||||
|
'part',
|
||||||
|
'temp',
|
||||||
|
'uncut',
|
||||||
|
'unknown_video',
|
||||||
|
'ytdl',
|
||||||
|
),
|
||||||
|
# video
|
||||||
|
MEDIA_EXTENSIONS.video, (
|
||||||
|
'asx',
|
||||||
|
'ismv',
|
||||||
|
'm2t',
|
||||||
|
'm2ts',
|
||||||
|
'm2v',
|
||||||
|
'm4s',
|
||||||
|
'mng',
|
||||||
|
'mp2v',
|
||||||
|
'mp4v',
|
||||||
|
'mpe',
|
||||||
|
'mpeg',
|
||||||
|
'mpeg1',
|
||||||
|
'mpeg2',
|
||||||
|
'mpeg4',
|
||||||
|
'mxf',
|
||||||
|
'ogm',
|
||||||
|
'qt',
|
||||||
|
'rm',
|
||||||
|
'swf',
|
||||||
|
'ts',
|
||||||
|
'vob',
|
||||||
|
'vp9',
|
||||||
|
),
|
||||||
|
# audio
|
||||||
|
MEDIA_EXTENSIONS.audio, (
|
||||||
|
'3ga',
|
||||||
|
'ac3',
|
||||||
|
'adts',
|
||||||
|
'aif',
|
||||||
|
'au',
|
||||||
|
'dts',
|
||||||
|
'isma',
|
||||||
|
'it',
|
||||||
|
'mid',
|
||||||
|
'mod',
|
||||||
|
'mpga',
|
||||||
|
'mp1',
|
||||||
|
'mp2',
|
||||||
|
'mp4a',
|
||||||
|
'mpa',
|
||||||
|
'ra',
|
||||||
|
'shn',
|
||||||
|
'xm',
|
||||||
|
),
|
||||||
|
# image
|
||||||
|
MEDIA_EXTENSIONS.thumbnails, (
|
||||||
|
'avif',
|
||||||
|
'bmp',
|
||||||
|
'gif',
|
||||||
|
'ico',
|
||||||
|
'heic',
|
||||||
|
'jng',
|
||||||
|
'jpeg',
|
||||||
|
'jxl',
|
||||||
|
'svg',
|
||||||
|
'tif',
|
||||||
|
'tiff',
|
||||||
|
'wbmp',
|
||||||
|
),
|
||||||
|
# subtitle
|
||||||
|
MEDIA_EXTENSIONS.subtitles, (
|
||||||
|
'dfxp',
|
||||||
|
'fs',
|
||||||
|
'ismt',
|
||||||
|
'json3',
|
||||||
|
'sami',
|
||||||
|
'scc',
|
||||||
|
'srv1',
|
||||||
|
'srv2',
|
||||||
|
'srv3',
|
||||||
|
'ssa',
|
||||||
|
'tt',
|
||||||
|
'xml',
|
||||||
|
),
|
||||||
|
# others
|
||||||
|
MEDIA_EXTENSIONS.manifests,
|
||||||
|
(
|
||||||
|
# not used in yt-dl
|
||||||
|
# *MEDIA_EXTENSIONS.storyboards,
|
||||||
|
# 'desktop',
|
||||||
|
# 'ism',
|
||||||
|
# 'm3u',
|
||||||
|
# 'sbv',
|
||||||
|
# 'swp',
|
||||||
|
# 'url',
|
||||||
|
# 'webloc',
|
||||||
|
)))
|
||||||
|
|
||||||
|
def __init__(self, extension):
|
||||||
|
super(_UnsafeExtensionError, self).__init__('unsafe file extension: {0!r}'.format(extension))
|
||||||
|
self.extension = extension
|
||||||
|
|
||||||
|
# support --no-check-extensions
|
||||||
|
lenient = False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sanitize_extension(cls, extension, **kwargs):
|
||||||
|
# ... /, *, prepend=False
|
||||||
|
prepend = kwargs.get('prepend', False)
|
||||||
|
|
||||||
|
if '/' in extension or '\\' in extension:
|
||||||
|
raise cls(extension)
|
||||||
|
|
||||||
|
if not prepend:
|
||||||
|
last = extension.rpartition('.')[-1]
|
||||||
|
if last == 'bin':
|
||||||
|
extension = last = 'unknown_video'
|
||||||
|
if not (cls.lenient or last.lower() in cls._ALLOWED_EXTENSIONS):
|
||||||
|
raise cls(extension)
|
||||||
|
|
||||||
|
return extension
|
||||||
|
Loading…
Reference in New Issue
Block a user