mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-23 00:16:48 +00:00
[jsinterp] Fix and improve split/join
* improve split/join edge cases * correctly implement regex split (not like re.split)
This commit is contained in:
parent
9993e8c5a9
commit
4bf85ca5ba
@ -483,6 +483,13 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self._test(jsi, 't-e-s-t', args=[test_input, '-'])
|
self._test(jsi, 't-e-s-t', args=[test_input, '-'])
|
||||||
self._test(jsi, '', args=[[], '-'])
|
self._test(jsi, '', args=[[], '-'])
|
||||||
|
|
||||||
|
self._test('function f(){return '
|
||||||
|
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join()}',
|
||||||
|
'1,1,abc,[object Object],,,Infinity,NaN')
|
||||||
|
self._test('function f(){return '
|
||||||
|
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join("~")}',
|
||||||
|
'1~1~abc~[object Object]~~~Infinity~NaN')
|
||||||
|
|
||||||
def test_split(self):
|
def test_split(self):
|
||||||
test_result = list('test')
|
test_result = list('test')
|
||||||
tests = [
|
tests = [
|
||||||
@ -496,6 +503,18 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self._test(jsi, test_result, args=['t-e-s-t', '-'])
|
self._test(jsi, test_result, args=['t-e-s-t', '-'])
|
||||||
self._test(jsi, [''], args=['', '-'])
|
self._test(jsi, [''], args=['', '-'])
|
||||||
self._test(jsi, [], args=['', ''])
|
self._test(jsi, [], args=['', ''])
|
||||||
|
# RegExp split
|
||||||
|
self._test('function f(){return "test".split(/(?:)/)}',
|
||||||
|
['t', 'e', 's', 't'])
|
||||||
|
self._test('function f(){return "t-e-s-t".split(/[es-]+/)}',
|
||||||
|
['t', 't'])
|
||||||
|
# from MDN: surrogate pairs aren't handled: case 1 fails
|
||||||
|
# self._test('function f(){return "😄😄".split(/(?:)/)}',
|
||||||
|
# ['\ud83d', '\ude04', '\ud83d', '\ude04'])
|
||||||
|
# case 2 beats Py3.2: it gets the case 1 result
|
||||||
|
if sys.version_info >= (2, 6) and not ((3, 0) <= sys.version_info < (3, 3)):
|
||||||
|
self._test('function f(){return "😄😄".split(/(?:)/u)}',
|
||||||
|
['😄', '😄'])
|
||||||
|
|
||||||
def test_slice(self):
|
def test_slice(self):
|
||||||
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||||
|
@ -397,6 +397,9 @@ class JSInterpreter(object):
|
|||||||
RE_FLAGS = {
|
RE_FLAGS = {
|
||||||
# special knowledge: Python's re flags are bitmask values, current max 128
|
# special knowledge: Python's re flags are bitmask values, current max 128
|
||||||
# invent new bitmask values well above that for literal parsing
|
# invent new bitmask values well above that for literal parsing
|
||||||
|
# JS 'u' flag is effectively always set (surrogate pairs aren't seen),
|
||||||
|
# but \u{...} and \p{...} escapes aren't handled); no additional JS 'v'
|
||||||
|
# features are supported
|
||||||
# TODO: execute matches with these flags (remaining: d, y)
|
# TODO: execute matches with these flags (remaining: d, y)
|
||||||
'd': 1024, # Generate indices for substring matches
|
'd': 1024, # Generate indices for substring matches
|
||||||
'g': 2048, # Global search
|
'g': 2048, # Global search
|
||||||
@ -404,6 +407,7 @@ class JSInterpreter(object):
|
|||||||
'm': re.M, # Multi-line search
|
'm': re.M, # Multi-line search
|
||||||
's': re.S, # Allows . to match newline characters
|
's': re.S, # Allows . to match newline characters
|
||||||
'u': re.U, # Treat a pattern as a sequence of unicode code points
|
'u': re.U, # Treat a pattern as a sequence of unicode code points
|
||||||
|
'v': re.U, # Like 'u' with extended character class and \p{} syntax
|
||||||
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
|
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1047,13 +1051,47 @@ class JSInterpreter(object):
|
|||||||
raise self.Exception('Unsupported Math method ' + member, expr=expr)
|
raise self.Exception('Unsupported Math method ' + member, expr=expr)
|
||||||
|
|
||||||
if member == 'split':
|
if member == 'split':
|
||||||
assertion(argvals, 'takes one or more arguments')
|
assertion(len(argvals) <= 2, 'takes at most two arguments')
|
||||||
assertion(len(argvals) == 1, 'with limit argument is not implemented')
|
if len(argvals) > 1:
|
||||||
return obj.split(argvals[0]) if argvals[0] else list(obj)
|
limit = argvals[1]
|
||||||
|
assertion(isinstance(limit, int) and limit >= 0, 'integer limit >= 0')
|
||||||
|
if limit == 0:
|
||||||
|
return []
|
||||||
|
else:
|
||||||
|
limit = 0
|
||||||
|
if len(argvals) == 0:
|
||||||
|
argvals = [JS_Undefined]
|
||||||
|
elif isinstance(argvals[0], self.JS_RegExp):
|
||||||
|
# avoid re.split(), similar but not enough
|
||||||
|
|
||||||
|
def where():
|
||||||
|
for m in argvals[0].finditer(obj):
|
||||||
|
yield m.span(0)
|
||||||
|
yield (None, None)
|
||||||
|
|
||||||
|
def splits(limit=limit):
|
||||||
|
i = 0
|
||||||
|
for j, jj in where():
|
||||||
|
if j == jj == 0:
|
||||||
|
continue
|
||||||
|
if j is None and i >= len(obj):
|
||||||
|
break
|
||||||
|
yield obj[i:j]
|
||||||
|
if jj is None or limit == 1:
|
||||||
|
break
|
||||||
|
limit -= 1
|
||||||
|
i = jj
|
||||||
|
|
||||||
|
return list(splits())
|
||||||
|
return (
|
||||||
|
obj.split(argvals[0], limit - 1) if argvals[0] and argvals[0] != JS_Undefined
|
||||||
|
else list(obj)[:limit or None])
|
||||||
elif member == 'join':
|
elif member == 'join':
|
||||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||||
assertion(len(argvals) == 1, 'takes exactly one argument')
|
assertion(len(argvals) <= 1, 'takes at most one argument')
|
||||||
return argvals[0].join(obj)
|
return (',' if len(argvals) == 0 else argvals[0]).join(
|
||||||
|
('' if x in (None, JS_Undefined) else _js_toString(x))
|
||||||
|
for x in obj)
|
||||||
elif member == 'reverse':
|
elif member == 'reverse':
|
||||||
assertion(not argvals, 'does not take any arguments')
|
assertion(not argvals, 'does not take any arguments')
|
||||||
obj.reverse()
|
obj.reverse()
|
||||||
|
Loading…
Reference in New Issue
Block a user