mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-22 07:56:49 +00:00
Compare commits
17 Commits
2f95383b86
...
07f581c526
Author | SHA1 | Date | |
---|---|---|---|
|
07f581c526 | ||
|
d55d1f423d | ||
|
eeafbbc3e5 | ||
|
cd7c7b5edb | ||
|
eed784e15f | ||
|
b4469a0f65 | ||
|
ce1e556b8f | ||
|
f487b4a02a | ||
|
60835ca16c | ||
|
94fd774608 | ||
|
5dee6213ed | ||
|
81e64cacf2 | ||
|
c1a03b1ac3 | ||
|
118c6d7a17 | ||
|
f28d7178e4 | ||
|
525c46942d | ||
|
ce7bb8d0ee |
70
README.md
70
README.md
@ -1,29 +1,29 @@
|
||||
[![Build Status](https://github.com/ytdl-org/youtube-dl/workflows/CI/badge.svg)](https://github.com/ytdl-org/youtube-dl/actions?query=workflow%3ACI)
|
||||
|
||||
|
||||
youtube-dl - download videos from youtube.com or other video platforms
|
||||
youtube-dl - download videos from youtube.com or other video platforms.
|
||||
|
||||
- [INSTALLATION](#installation)
|
||||
- [DESCRIPTION](#description)
|
||||
- [OPTIONS](#options)
|
||||
- [CONFIGURATION](#configuration)
|
||||
- [OUTPUT TEMPLATE](#output-template)
|
||||
- [FORMAT SELECTION](#format-selection)
|
||||
- [VIDEO SELECTION](#video-selection)
|
||||
- [Installation](#installation)
|
||||
- [Description](#description)
|
||||
- [Options](#options)
|
||||
- [Configuration](#configuration)
|
||||
- [Output template](#output-template)
|
||||
- [Format selection](#format-selection)
|
||||
- [Video selection](#video-selection)
|
||||
- [FAQ](#faq)
|
||||
- [DEVELOPER INSTRUCTIONS](#developer-instructions)
|
||||
- [EMBEDDING YOUTUBE-DL](#embedding-youtube-dl)
|
||||
- [BUGS](#bugs)
|
||||
- [COPYRIGHT](#copyright)
|
||||
- [Developer instructions](#developer-instructions)
|
||||
- [Embedding youtube-dl](#embedding-youtube-dl)
|
||||
- [Bugs](#bugs)
|
||||
- [Copyright](#copyright)
|
||||
|
||||
# INSTALLATION
|
||||
|
||||
To install it right away for all UNIX users (Linux, macOS, etc.), type:
|
||||
To install it right away for all Unix users (Linux, macOS, etc.), type:
|
||||
|
||||
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
|
||||
If you do not have curl, you can alternatively use a recent wget:
|
||||
If you do not have curl, you can alternatively use wget:
|
||||
|
||||
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
@ -44,10 +44,10 @@ Or with [MacPorts](https://www.macports.org/):
|
||||
|
||||
sudo port install youtube-dl
|
||||
|
||||
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://ytdl-org.github.io/youtube-dl/download.html).
|
||||
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the Git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://ytdl-org.github.io/youtube-dl/download.html).
|
||||
|
||||
# DESCRIPTION
|
||||
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
|
||||
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix computer, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
youtube-dl [OPTIONS] URL [URL...]
|
||||
|
||||
@ -97,7 +97,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
only)
|
||||
--no-color Do not emit color codes in output
|
||||
|
||||
## Network Options:
|
||||
## Network options
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS
|
||||
proxy. To enable SOCKS proxy, specify a
|
||||
proper scheme. For example
|
||||
@ -110,7 +110,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
|
||||
## Geo Restriction:
|
||||
## Geo restriction
|
||||
--geo-verification-proxy URL Use this proxy to verify the IP address
|
||||
for some geo-restricted sites. The
|
||||
default proxy specified by --proxy (or
|
||||
@ -127,7 +127,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
with explicitly provided IP block in
|
||||
CIDR notation
|
||||
|
||||
## Video Selection:
|
||||
## Video selection
|
||||
--playlist-start NUMBER Playlist video to start at (default is
|
||||
1)
|
||||
--playlist-end NUMBER Playlist video to end at (default is
|
||||
@ -193,7 +193,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--include-ads Download advertisements as well
|
||||
(experimental)
|
||||
|
||||
## Download Options:
|
||||
## Download options
|
||||
-r, --limit-rate RATE Maximum download rate in bytes per
|
||||
second (e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
@ -239,7 +239,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--external-downloader-args ARGS Give these arguments to the external
|
||||
downloader
|
||||
|
||||
## Filesystem Options:
|
||||
## Filesystem options
|
||||
-a, --batch-file FILE File containing URLs to download ('-'
|
||||
for stdin), one URL per line. Lines
|
||||
starting with '#', ';' or ']' are
|
||||
@ -287,14 +287,14 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
## Thumbnail Options:
|
||||
## Thumbnail options
|
||||
--write-thumbnail Write thumbnail image to disk
|
||||
--write-all-thumbnails Write all thumbnail image formats to
|
||||
disk
|
||||
--list-thumbnails Simulate and list all available
|
||||
thumbnail formats
|
||||
|
||||
## Verbosity / Simulation Options:
|
||||
## Verbosity / simulation options
|
||||
-q, --quiet Activate quiet mode
|
||||
--no-warnings Ignore warnings
|
||||
-s, --simulate Do not download the video and do not
|
||||
@ -336,7 +336,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--no-call-home Do NOT contact the youtube-dl server
|
||||
for debugging
|
||||
|
||||
## Workarounds:
|
||||
## Workarounds
|
||||
--encoding ENCODING Force the specified encoding
|
||||
(experimental)
|
||||
--no-check-certificate Suppress HTTPS certificate validation
|
||||
@ -365,7 +365,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
Must only be used along with --min-
|
||||
sleep-interval.
|
||||
|
||||
## Video Format Options:
|
||||
## Video format options
|
||||
-f, --format FORMAT Video format code, see the "FORMAT
|
||||
SELECTION" for all the info
|
||||
--all-formats Download all available video formats
|
||||
@ -381,7 +381,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
webm, flv. Ignored if no merge is
|
||||
required
|
||||
|
||||
## Subtitle Options:
|
||||
## Subtitle options
|
||||
--write-sub Write subtitle file
|
||||
--write-auto-sub Write automatically generated subtitle
|
||||
file (YouTube only)
|
||||
@ -396,7 +396,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
(optional) separated by commas, use
|
||||
--list-subs for available language tags
|
||||
|
||||
## Authentication Options:
|
||||
## Authentication options
|
||||
-u, --username USERNAME Login with this account ID
|
||||
-p, --password PASSWORD Account password. If this option is
|
||||
left out, youtube-dl will ask
|
||||
@ -405,7 +405,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
-n, --netrc Use .netrc authentication data
|
||||
--video-password PASSWORD Video password (vimeo, youku)
|
||||
|
||||
## Adobe Pass Options:
|
||||
## Adobe Pass options
|
||||
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
||||
provider) identifier, use --ap-list-mso
|
||||
for a list of available MSOs
|
||||
@ -416,7 +416,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--ap-list-mso List all supported multiple-system
|
||||
operators
|
||||
|
||||
## Post-processing Options:
|
||||
## Post-processing options
|
||||
-x, --extract-audio Convert video files to audio-only files
|
||||
(requires ffmpeg/avconv and
|
||||
ffprobe/avprobe)
|
||||
@ -480,7 +480,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and macOS, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||
You can configure youtube-dl by placing any supported command-line option to a configuration file. On Linux and macOS, the system-wide configuration file is located at `/etc/youtube-dl.conf` and the user-wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user-wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
@ -499,7 +499,7 @@ For example, with the following configuration file youtube-dl will always extrac
|
||||
-o ~/Movies/%(title)s.%(ext)s
|
||||
```
|
||||
|
||||
Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
|
||||
Note that options in configuration file are just the same options aka switches used in regular command-line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
|
||||
|
||||
You can use `--ignore-config` if you want to disable the configuration file for a particular youtube-dl run.
|
||||
|
||||
@ -507,7 +507,7 @@ You can also use `--config-location` if you want to use custom configuration fil
|
||||
|
||||
### Authentication with `.netrc` file
|
||||
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command-line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
|
||||
```
|
||||
touch $HOME/.netrc
|
||||
chmod a-rwx,u+rw $HOME/.netrc
|
||||
@ -534,7 +534,7 @@ The `-o` option allows users to indicate a template for the output file names.
|
||||
|
||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||
|
||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
|
||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
|
||||
|
||||
- `id` (string): Video identifier
|
||||
- `title` (string): Video title
|
||||
@ -640,7 +640,7 @@ If you are using an output template inside a Windows batch file then you must es
|
||||
|
||||
#### Output template examples
|
||||
|
||||
Note that on Windows you may need to use double quotes instead of single.
|
||||
Note that on Windows you may need to use double quotes instead of single quotes.
|
||||
|
||||
```bash
|
||||
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
|
||||
@ -733,7 +733,7 @@ If you want to preserve the old format selection behavior (prior to youtube-dl 2
|
||||
|
||||
#### Format selection examples
|
||||
|
||||
Note that on Windows you may need to use double quotes instead of single.
|
||||
Note that on Windows you may need to use double quotes instead of single quotes.
|
||||
|
||||
```bash
|
||||
# Download best mp4 format available or any other best if no mp4 available
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@ -11,7 +12,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import math
|
||||
import re
|
||||
|
||||
from youtube_dl.compat import compat_str
|
||||
from youtube_dl.compat import compat_str as str
|
||||
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
|
||||
|
||||
NaN = object()
|
||||
@ -19,7 +20,7 @@ NaN = object()
|
||||
|
||||
class TestJSInterpreter(unittest.TestCase):
|
||||
def _test(self, jsi_or_code, expected, func='f', args=()):
|
||||
if isinstance(jsi_or_code, compat_str):
|
||||
if isinstance(jsi_or_code, str):
|
||||
jsi_or_code = JSInterpreter(jsi_or_code)
|
||||
got = jsi_or_code.call_function(func, *args)
|
||||
if expected is NaN:
|
||||
@ -40,16 +41,27 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){return 42 + 7;}', 49)
|
||||
self._test('function f(){return 42 + undefined;}', NaN)
|
||||
self._test('function f(){return 42 + null;}', 42)
|
||||
self._test('function f(){return 1 + "";}', '1')
|
||||
self._test('function f(){return 42 + "7";}', '427')
|
||||
self._test('function f(){return false + true;}', 1)
|
||||
self._test('function f(){return "false" + true;}', 'falsetrue')
|
||||
self._test('function f(){return '
|
||||
'1 + "2" + [3,4] + {k: 56} + null + undefined + Infinity;}',
|
||||
'123,4[object Object]nullundefinedInfinity')
|
||||
|
||||
def test_sub(self):
|
||||
self._test('function f(){return 42 - 7;}', 35)
|
||||
self._test('function f(){return 42 - undefined;}', NaN)
|
||||
self._test('function f(){return 42 - null;}', 42)
|
||||
self._test('function f(){return 42 - "7";}', 35)
|
||||
self._test('function f(){return 42 - "spam";}', NaN)
|
||||
|
||||
def test_mul(self):
|
||||
self._test('function f(){return 42 * 7;}', 294)
|
||||
self._test('function f(){return 42 * undefined;}', NaN)
|
||||
self._test('function f(){return 42 * null;}', 0)
|
||||
self._test('function f(){return 42 * "7";}', 294)
|
||||
self._test('function f(){return 42 * "eggs";}', NaN)
|
||||
|
||||
def test_div(self):
|
||||
jsi = JSInterpreter('function f(a, b){return a / b;}')
|
||||
@ -57,17 +69,26 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test(jsi, NaN, args=(JS_Undefined, 1))
|
||||
self._test(jsi, float('inf'), args=(2, 0))
|
||||
self._test(jsi, 0, args=(0, 3))
|
||||
self._test(jsi, 6, args=(42, 7))
|
||||
self._test(jsi, 0, args=(42, float('inf')))
|
||||
self._test(jsi, 6, args=("42", 7))
|
||||
self._test(jsi, NaN, args=("spam", 7))
|
||||
|
||||
def test_mod(self):
|
||||
self._test('function f(){return 42 % 7;}', 0)
|
||||
self._test('function f(){return 42 % 0;}', NaN)
|
||||
self._test('function f(){return 42 % undefined;}', NaN)
|
||||
self._test('function f(){return 42 % "7";}', 0)
|
||||
self._test('function f(){return 42 % "beans";}', NaN)
|
||||
|
||||
def test_exp(self):
|
||||
self._test('function f(){return 42 ** 2;}', 1764)
|
||||
self._test('function f(){return 42 ** undefined;}', NaN)
|
||||
self._test('function f(){return 42 ** null;}', 1)
|
||||
self._test('function f(){return undefined ** 0;}', 1)
|
||||
self._test('function f(){return undefined ** 42;}', NaN)
|
||||
self._test('function f(){return 42 ** "2";}', 1764)
|
||||
self._test('function f(){return 42 ** "spam";}', NaN)
|
||||
|
||||
def test_calc(self):
|
||||
self._test('function f(a){return 2*a+1;}', 7, args=[3])
|
||||
@ -89,7 +110,35 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){return 19 & 21;}', 17)
|
||||
self._test('function f(){return 11 >> 2;}', 2)
|
||||
self._test('function f(){return []? 2+3: 4;}', 5)
|
||||
# equality
|
||||
self._test('function f(){return 1 == 1}', True)
|
||||
self._test('function f(){return 1 == 1.0}', True)
|
||||
self._test('function f(){return 1 == "1"}', True)
|
||||
self._test('function f(){return 1 == 2}', False)
|
||||
self._test('function f(){return 1 != "1"}', False)
|
||||
self._test('function f(){return 1 != 2}', True)
|
||||
self._test('function f(){var x = {a: 1}; var y = x; return x == y}', True)
|
||||
self._test('function f(){var x = {a: 1}; return x == {a: 1}}', False)
|
||||
self._test('function f(){return NaN == NaN}', False)
|
||||
self._test('function f(){return null == undefined}', True)
|
||||
self._test('function f(){return "spam, eggs" == "spam, eggs"}', True)
|
||||
# strict equality
|
||||
self._test('function f(){return 1 === 1}', True)
|
||||
self._test('function f(){return 1 === 1.0}', True)
|
||||
self._test('function f(){return 1 === "1"}', False)
|
||||
self._test('function f(){return 1 === 2}', False)
|
||||
self._test('function f(){var x = {a: 1}; var y = x; return x === y}', True)
|
||||
self._test('function f(){var x = {a: 1}; return x === {a: 1}}', False)
|
||||
self._test('function f(){return NaN === NaN}', False)
|
||||
self._test('function f(){return null === undefined}', False)
|
||||
self._test('function f(){return null === null}', True)
|
||||
self._test('function f(){return undefined === undefined}', True)
|
||||
self._test('function f(){return "uninterned" === "uninterned"}', True)
|
||||
self._test('function f(){return 1 === 1}', True)
|
||||
self._test('function f(){return 1 === "1"}', False)
|
||||
self._test('function f(){return 1 !== 1}', False)
|
||||
self._test('function f(){return 1 !== "1"}', True)
|
||||
# expressions
|
||||
self._test('function f(){return 0 && 1 || 2;}', 2)
|
||||
self._test('function f(){return 0 ?? 42;}', 0)
|
||||
self._test('function f(){return "life, the universe and everything" < 42;}', False)
|
||||
@ -111,7 +160,6 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
|
||||
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
|
||||
|
||||
@unittest.skip('Not yet fully implemented')
|
||||
def test_comments(self):
|
||||
self._test('''
|
||||
function f() {
|
||||
@ -130,6 +178,15 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
}
|
||||
''', 3)
|
||||
|
||||
self._test('''
|
||||
function f() {
|
||||
var x = ( /* 1 + */ 2 +
|
||||
/* 30 * 40 */
|
||||
50);
|
||||
return x;
|
||||
}
|
||||
''', 52)
|
||||
|
||||
def test_precedence(self):
|
||||
self._test('''
|
||||
function f() {
|
||||
@ -266,7 +323,20 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
|
||||
|
||||
def test_void(self):
|
||||
self._test('function f() { return void 42; }', None)
|
||||
self._test('function f() { return void 42; }', JS_Undefined)
|
||||
|
||||
def test_typeof(self):
|
||||
self._test('function f() { return typeof undefined; }', 'undefined')
|
||||
self._test('function f() { return typeof NaN; }', 'number')
|
||||
self._test('function f() { return typeof Infinity; }', 'number')
|
||||
self._test('function f() { return typeof true; }', 'boolean')
|
||||
self._test('function f() { return typeof null; }', 'object')
|
||||
self._test('function f() { return typeof "a string"; }', 'string')
|
||||
self._test('function f() { return typeof 42; }', 'number')
|
||||
self._test('function f() { return typeof 42.42; }', 'number')
|
||||
self._test('function f() { var g = function(){}; return typeof g; }', 'function')
|
||||
self._test('function f() { return typeof {key: "value"}; }', 'object')
|
||||
# not yet implemented: Symbol, BigInt
|
||||
|
||||
def test_return_function(self):
|
||||
jsi = JSInterpreter('''
|
||||
@ -324,6 +394,16 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f() { let a; return a?.qq; }', JS_Undefined)
|
||||
self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)
|
||||
|
||||
def test_indexing(self):
|
||||
self._test('function f() { return [1, 2, 3, 4][3]}', 4)
|
||||
self._test('function f() { return [1, [2, [3, [4]]]][1][1][1][0]}', 4)
|
||||
self._test('function f() { var o = {1: 2, 3: 4}; return o[3]}', 4)
|
||||
self._test('function f() { var o = {1: 2, 3: 4}; return o["3"]}', 4)
|
||||
self._test('function f() { return [1, [2, {3: [4]}]][1][1]["3"][0]}', 4)
|
||||
self._test('function f() { return [1, 2, 3, 4].length}', 4)
|
||||
self._test('function f() { var o = {1: 2, 3: 4}; return o.length}', JS_Undefined)
|
||||
self._test('function f() { var o = {1: 2, 3: 4}; o["length"] = 42; return o.length}', 42)
|
||||
|
||||
def test_regex(self):
|
||||
self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
|
||||
|
||||
@ -411,6 +491,13 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test(jsi, 't-e-s-t', args=[test_input, '-'])
|
||||
self._test(jsi, '', args=[[], '-'])
|
||||
|
||||
self._test('function f(){return '
|
||||
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join()}',
|
||||
'1,1,abc,[object Object],,,Infinity,NaN')
|
||||
self._test('function f(){return '
|
||||
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join("~")}',
|
||||
'1~1~abc~[object Object]~~~Infinity~NaN')
|
||||
|
||||
def test_split(self):
|
||||
test_result = list('test')
|
||||
tests = [
|
||||
@ -424,6 +511,18 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test(jsi, test_result, args=['t-e-s-t', '-'])
|
||||
self._test(jsi, [''], args=['', '-'])
|
||||
self._test(jsi, [], args=['', ''])
|
||||
# RegExp split
|
||||
self._test('function f(){return "test".split(/(?:)/)}',
|
||||
['t', 'e', 's', 't'])
|
||||
self._test('function f(){return "t-e-s-t".split(/[es-]+/)}',
|
||||
['t', 't'])
|
||||
# from MDN: surrogate pairs aren't handled: case 1 fails
|
||||
# self._test('function f(){return "😄😄".split(/(?:)/)}',
|
||||
# ['\ud83d', '\ude04', '\ud83d', '\ude04'])
|
||||
# case 2 beats Py3.2: it gets the case 1 result
|
||||
if sys.version_info >= (2, 6) and not ((3, 0) <= sys.version_info < (3, 3)):
|
||||
self._test('function f(){return "😄😄".split(/(?:)/u)}',
|
||||
['😄', '😄'])
|
||||
|
||||
def test_slice(self):
|
||||
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||
@ -453,6 +552,40 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){return "012345678".slice(-1, 1)}', '')
|
||||
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
|
||||
|
||||
def test_pop(self):
|
||||
# pop
|
||||
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.pop(), a]}',
|
||||
[8, [0, 1, 2, 3, 4, 5, 6, 7]])
|
||||
self._test('function f(){return [].pop()}', JS_Undefined)
|
||||
# push
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.push(3, 4), a]}',
|
||||
[5, [0, 1, 2, 3, 4]])
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.push(), a]}',
|
||||
[3, [0, 1, 2]])
|
||||
|
||||
def test_shift(self):
|
||||
# shift
|
||||
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.shift(), a]}',
|
||||
[0, [1, 2, 3, 4, 5, 6, 7, 8]])
|
||||
self._test('function f(){return [].shift()}', JS_Undefined)
|
||||
# unshift
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.unshift(3, 4), a]}',
|
||||
[5, [3, 4, 0, 1, 2]])
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.unshift(), a]}',
|
||||
[3, [0, 1, 2]])
|
||||
|
||||
def test_forEach(self):
|
||||
self._test('function f(){var ret = []; var l = [4, 2]; '
|
||||
'var log = function(e,i,a){ret.push([e,i,a]);}; '
|
||||
'l.forEach(log); '
|
||||
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
|
||||
[2, 4, 1, [4, 2]])
|
||||
self._test('function f(){var ret = []; var l = [4, 2]; '
|
||||
'var log = function(e,i,a){this.push([e,i,a]);}; '
|
||||
'l.forEach(log, ret); '
|
||||
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
|
||||
[2, 4, 1, [4, 2]])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@ -12,6 +13,7 @@ import re
|
||||
import string
|
||||
|
||||
from youtube_dl.compat import (
|
||||
compat_contextlib_suppress,
|
||||
compat_open as open,
|
||||
compat_str,
|
||||
compat_urlretrieve,
|
||||
@ -50,23 +52,38 @@ _SIG_TESTS = [
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
||||
84,
|
||||
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
|
||||
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
|
||||
83,
|
||||
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
|
||||
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
|
||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
|
||||
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
|
||||
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
|
||||
)
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
|
||||
),
|
||||
]
|
||||
|
||||
_NSIG_TESTS = [
|
||||
@ -142,6 +159,10 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
|
||||
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js',
|
||||
'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
|
||||
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
|
||||
@ -154,6 +175,10 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
|
||||
'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
|
||||
'1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
|
||||
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
|
||||
@ -182,6 +207,18 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
|
||||
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
|
||||
'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/f8f53e1a/player_ias.vflset/en_US/base.js',
|
||||
'VTQOUOv0mCIeJ7i8kZB', 'kcfD8wy0sNLyNQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
|
||||
'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@ -216,11 +253,9 @@ class TestSignature(unittest.TestCase):
|
||||
os.mkdir(self.TESTDATA_DIR)
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
with compat_contextlib_suppress(OSError):
|
||||
for f in os.listdir(self.TESTDATA_DIR):
|
||||
os.remove(f)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def t_factory(name, sig_func, url_pattern):
|
||||
@ -254,11 +289,12 @@ def signature(jscode, sig_input):
|
||||
|
||||
def n_sig(jscode, sig_input):
|
||||
funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
|
||||
return JSInterpreter(jscode).call_function(funcname, sig_input)
|
||||
return JSInterpreter(jscode).call_function(
|
||||
funcname, sig_input, _ytdl_do_not_return=sig_input)
|
||||
|
||||
|
||||
make_sig_test = t_factory(
|
||||
'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
|
||||
'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
|
||||
for test_spec in _SIG_TESTS:
|
||||
make_sig_test(*test_spec)
|
||||
|
||||
|
@ -3170,7 +3170,7 @@ class InfoExtractor(object):
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
|
@ -3,11 +3,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import collections
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
import os.path
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
@ -290,6 +292,33 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
||||
|
||||
_SAPISID = None
|
||||
|
||||
def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
|
||||
time_now = round(time.time())
|
||||
if self._SAPISID is None:
|
||||
yt_cookies = self._get_cookies('https://www.youtube.com')
|
||||
# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/393
|
||||
sapisid_cookie = dict_get(
|
||||
yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
|
||||
if sapisid_cookie and sapisid_cookie.value:
|
||||
self._SAPISID = sapisid_cookie.value
|
||||
self.write_debug('Extracted SAPISID cookie')
|
||||
# SAPISID cookie is required if not already present
|
||||
if not yt_cookies.get('SAPISID'):
|
||||
self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
|
||||
self._set_cookie(
|
||||
'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
|
||||
else:
|
||||
self._SAPISID = False
|
||||
if not self._SAPISID:
|
||||
return None
|
||||
# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
|
||||
sapisidhash = hashlib.sha1(
|
||||
'{0} {1} {2}'.format(time_now, self._SAPISID, origin).encode('utf-8')).hexdigest()
|
||||
return 'SAPISIDHASH {0}_{1}'.format(time_now, sapisidhash)
|
||||
|
||||
def _call_api(self, ep, query, video_id, fatal=True, headers=None):
|
||||
data = self._DEFAULT_API_DATA.copy()
|
||||
data.update(query)
|
||||
@ -1579,20 +1608,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self.to_screen('Extracted signature function:\n' + code)
|
||||
|
||||
def _parse_sig_js(self, jscode):
|
||||
# Examples where `sig` is funcname:
|
||||
# sig=function(a){a=a.split(""); ... ;return a.join("")};
|
||||
# ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
|
||||
# {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
|
||||
# sig=function(J){J=J.split(""); ... ;return J.join("")};
|
||||
# ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
|
||||
# {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
|
||||
funcname = self._search_regex(
|
||||
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
||||
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
|
||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
|
||||
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||
(r'\b(?P<var>[\w$]+)&&\((?P=var)=(?P<sig>[\w$]{2,})\(decodeURIComponent\((?P=var)\)\)',
|
||||
r'(?P<sig>[\w$]+)\s*=\s*function\(\s*(?P<arg>[\w$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
|
||||
r'(?:\b|[^\w$])(?P<sig>[\w$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[\w$]{2}\.[\w$]{2}\(a,\d+\))?',
|
||||
# Old patterns
|
||||
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[\w$]+)\(',
|
||||
r'\b[\w]+\s*&&\s*[\w]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[\w$]+)\(',
|
||||
r'\bm=(?P<sig>[\w$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
||||
# Obsolete patterns
|
||||
r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||
r'("|\')signature\1\s*,\s*(?P<sig>[\w$]+)\(',
|
||||
r'\.sig\|\|(?P<sig>[\w$]+)\(',
|
||||
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[\w$]+)\(',
|
||||
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[\w$]+)\(',
|
||||
r'\bc\s*&&\s*[\w]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[\w$]+)\('),
|
||||
jscode, 'Initial JS player signature function name', group='sig')
|
||||
|
||||
jsi = JSInterpreter(jscode)
|
||||
@ -1658,36 +1694,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_n_function_name(self, jscode):
|
||||
func_name, idx = self._search_regex(
|
||||
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
|
||||
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
|
||||
# or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
|
||||
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
|
||||
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
|
||||
# or: (b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
|
||||
# or: (b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c)
|
||||
# or: (PL(a),b=a.j.n||null)&&(b=narray[idx](b)
|
||||
# or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
|
||||
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
|
||||
# old: (b=a.get("n"))&&(b=narray[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
|
||||
# older: (b=a.get("n"))&&(b=nfunc(b)
|
||||
r'''(?x)
|
||||
\((?:[\w$()\s]+,)*?\s* # (
|
||||
(?P<b>[a-z])\s*=\s* # b=
|
||||
(?:
|
||||
(?: # expect ,c=a.get(b) (etc)
|
||||
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
|
||||
"n+"\[\s*\+?s*[\w$.]+\s*]
|
||||
)\s*(?:,[\w$()\s]+(?=,))*|
|
||||
(?P<old>[\w$]+) # a (old[er])
|
||||
)\s*
|
||||
(?(old)
|
||||
# b.get("n")
|
||||
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
||||
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
|
||||
| # ,c=a.get(b)
|
||||
,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
|
||||
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
||||
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
|
||||
)
|
||||
# interstitial junk
|
||||
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
|
||||
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
|
||||
# nfunc|nfunc[idx]
|
||||
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
|
||||
# (expr, ...,
|
||||
\((?:(?:\s*[\w$]+\s*=)?(?:[\w$"+\.\s(\[]+(?:[)\]]\s*)?),)*
|
||||
# b=...
|
||||
(?P<b>[\w$]+)\s*=\s*(?!(?P=b)[^\w$])[\w$]+\s*(?:(?:
|
||||
\.\s*[\w$]+ |
|
||||
\[\s*[\w$]+\s*\] |
|
||||
\.\s*get\s*\(\s*[\w$"]+\s*\)
|
||||
)\s*){,2}(?:\s*\|\|\s*null(?=\s*\)))?\s*
|
||||
\)\s*&&\s*\( # ...)&&(
|
||||
# b = nfunc, b = narray[idx]
|
||||
(?P=b)\s*=\s*(?P<nfunc>[\w$]+)\s*
|
||||
(?:\[\s*(?P<idx>[\w$]+)\s*\]\s*)?
|
||||
# (...)
|
||||
\(\s*[\w$]+\s*\)
|
||||
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
|
||||
default=(None, None))
|
||||
# thx bashonly: yt-dlp/yt-dlp/pull/10611
|
||||
@ -1697,15 +1726,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
r'''(?xs)
|
||||
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
|
||||
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
|
||||
\s*\{(?:(?!};).)+?["']enhanced_except_
|
||||
\s*\{(?:(?!};).)+?(?:
|
||||
["']enhanced_except_ |
|
||||
return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[\w$]+
|
||||
)
|
||||
''', jscode, 'Initial JS player n function name', group='name')
|
||||
if not idx:
|
||||
return func_name
|
||||
|
||||
return self._parse_json(self._search_regex(
|
||||
r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
|
||||
'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
|
||||
func_name, transform_source=js_to_json)[int(idx)]
|
||||
return self._search_json(
|
||||
r'var\s+{0}\s*='.format(re.escape(func_name)), jscode,
|
||||
'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
|
||||
func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]',
|
||||
transform_source=js_to_json)[int(idx)]
|
||||
|
||||
def _extract_n_function_code(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
@ -1728,13 +1761,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def extract_nsig(s):
|
||||
try:
|
||||
ret = func([s])
|
||||
ret = func([s], kwargs={'_ytdl_do_not_return': s})
|
||||
except JSInterpreter.Exception:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
|
||||
|
||||
if ret.startswith('enhanced_except_'):
|
||||
if ret.startswith('enhanced_except_') or ret.endswith(s):
|
||||
raise JSInterpreter.Exception('Signature function returned an exception')
|
||||
return ret
|
||||
|
||||
@ -1910,9 +1943,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
player_response = self._extract_yt_initial_variable(
|
||||
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
|
||||
video_id, 'initial player response')
|
||||
if not player_response:
|
||||
if False and not player_response:
|
||||
player_response = self._call_api(
|
||||
'player', {'videoId': video_id}, video_id)
|
||||
if True or not player_response:
|
||||
origin = 'https://www.youtube.com'
|
||||
pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
|
||||
|
||||
player_url = self._extract_player_url(webpage)
|
||||
ytcfg = self._extract_ytcfg(video_id, webpage)
|
||||
sts = self._extract_signature_timestamp(video_id, player_url, ytcfg)
|
||||
if sts:
|
||||
pb_context['signatureTimestamp'] = sts
|
||||
|
||||
query = {
|
||||
'playbackContext': {
|
||||
'contentPlaybackContext': pb_context,
|
||||
'contentCheckOk': True,
|
||||
'racyCheckOk': True,
|
||||
},
|
||||
'context': {
|
||||
'client': {
|
||||
'clientName': 'MWEB',
|
||||
'clientVersion': '2.20241202.07.00',
|
||||
'hl': 'en',
|
||||
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
|
||||
'timeZone': 'UTC',
|
||||
'utcOffsetMinutes': 0,
|
||||
},
|
||||
},
|
||||
'videoId': video_id,
|
||||
}
|
||||
headers = {
|
||||
'X-YouTube-Client-Name': '2',
|
||||
'X-YouTube-Client-Version': '2.20241202.07.00',
|
||||
'Origin': origin,
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'User-Agent': query['context']['client']['userAgent'],
|
||||
}
|
||||
auth = self._generate_sapisidhash_header(origin)
|
||||
if auth is not None:
|
||||
headers['Authorization'] = auth
|
||||
headers['X-Origin'] = origin
|
||||
|
||||
player_response = self._call_api('player', query, video_id, fatal=False, headers=headers)
|
||||
|
||||
def is_agegated(playability):
|
||||
if not isinstance(playability, dict):
|
||||
@ -2219,12 +2293,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
formats.append(f)
|
||||
|
||||
playable_formats = [f for f in formats if not f.get('has_drm')]
|
||||
if formats and not playable_formats:
|
||||
if formats:
|
||||
if not playable_formats:
|
||||
# If there are no formats that definitely don't have DRM, all have DRM
|
||||
self.report_drm(video_id)
|
||||
formats[:] = playable_formats
|
||||
|
||||
if not formats:
|
||||
else:
|
||||
if streaming_data.get('licenseInfos'):
|
||||
raise ExtractorError(
|
||||
'This video is DRM protected.', expected=True)
|
||||
|
@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
@ -5,11 +6,12 @@ import json
|
||||
import operator
|
||||
import re
|
||||
|
||||
from functools import update_wrapper
|
||||
from functools import update_wrapper, wraps
|
||||
|
||||
from .utils import (
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
remove_quotes,
|
||||
unified_timestamp,
|
||||
@ -20,9 +22,11 @@ from .compat import (
|
||||
compat_basestring,
|
||||
compat_chr,
|
||||
compat_collections_chain_map as ChainMap,
|
||||
compat_contextlib_suppress,
|
||||
compat_filter as filter,
|
||||
compat_itertools_zip_longest as zip_longest,
|
||||
compat_map as map,
|
||||
compat_numeric_types,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
@ -62,6 +66,10 @@ _NaN = float('nan')
|
||||
_Infinity = float('inf')
|
||||
|
||||
|
||||
class JS_Undefined(object):
|
||||
pass
|
||||
|
||||
|
||||
def _js_bit_op(op):
|
||||
|
||||
def zeroise(x):
|
||||
@ -74,43 +82,114 @@ def _js_bit_op(op):
|
||||
return wrapped
|
||||
|
||||
|
||||
def _js_arith_op(op):
|
||||
def _js_arith_op(op, div=False):
|
||||
|
||||
@wraps_op(op)
|
||||
def wrapped(a, b):
|
||||
if JS_Undefined in (a, b):
|
||||
return _NaN
|
||||
return op(a or 0, b or 0)
|
||||
# null, "" --> 0
|
||||
a, b = (float_or_none(
|
||||
(x.strip() if isinstance(x, compat_basestring) else x) or 0,
|
||||
default=_NaN) for x in (a, b))
|
||||
if _NaN in (a, b):
|
||||
return _NaN
|
||||
try:
|
||||
return op(a, b)
|
||||
except ZeroDivisionError:
|
||||
return _NaN if not (div and (a or b)) else _Infinity
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
def _js_div(a, b):
|
||||
if JS_Undefined in (a, b) or not (a or b):
|
||||
return _NaN
|
||||
return operator.truediv(a or 0, b) if b else _Infinity
|
||||
_js_arith_add = _js_arith_op(operator.add)
|
||||
|
||||
|
||||
def _js_mod(a, b):
|
||||
if JS_Undefined in (a, b) or not b:
|
||||
return _NaN
|
||||
return (a or 0) % b
|
||||
def _js_add(a, b):
|
||||
if not (isinstance(a, compat_basestring) or isinstance(b, compat_basestring)):
|
||||
return _js_arith_add(a, b)
|
||||
if not isinstance(a, compat_basestring):
|
||||
a = _js_toString(a)
|
||||
elif not isinstance(b, compat_basestring):
|
||||
b = _js_toString(b)
|
||||
return operator.concat(a, b)
|
||||
|
||||
|
||||
_js_mod = _js_arith_op(operator.mod)
|
||||
__js_exp = _js_arith_op(operator.pow)
|
||||
|
||||
|
||||
def _js_exp(a, b):
|
||||
if not b:
|
||||
return 1 # even 0 ** 0 !!
|
||||
elif JS_Undefined in (a, b):
|
||||
return _NaN
|
||||
return (a or 0) ** b
|
||||
return __js_exp(a, b)
|
||||
|
||||
|
||||
def _js_eq_op(op):
|
||||
def _js_to_primitive(v):
|
||||
return (
|
||||
','.join(map(_js_toString, v)) if isinstance(v, list)
|
||||
else '[object Object]' if isinstance(v, dict)
|
||||
else compat_str(v) if not isinstance(v, (
|
||||
compat_numeric_types, compat_basestring))
|
||||
else v
|
||||
)
|
||||
|
||||
|
||||
def _js_toString(v):
|
||||
return (
|
||||
'undefined' if v is JS_Undefined
|
||||
else 'Infinity' if v == _Infinity
|
||||
else 'NaN' if v is _NaN
|
||||
else 'null' if v is None
|
||||
# bool <= int: do this first
|
||||
else ('false', 'true')[v] if isinstance(v, bool)
|
||||
else '{0:.7f}'.format(v).rstrip('.0') if isinstance(v, compat_numeric_types)
|
||||
else _js_to_primitive(v))
|
||||
|
||||
|
||||
_nullish = frozenset((None, JS_Undefined))
|
||||
|
||||
|
||||
def _js_eq(a, b):
|
||||
# NaN != any
|
||||
if _NaN in (a, b):
|
||||
return False
|
||||
# Object is Object
|
||||
if isinstance(a, type(b)) and isinstance(b, (dict, list)):
|
||||
return operator.is_(a, b)
|
||||
# general case
|
||||
if a == b:
|
||||
return True
|
||||
# null == undefined
|
||||
a_b = set((a, b))
|
||||
if a_b & _nullish:
|
||||
return a_b <= _nullish
|
||||
a, b = _js_to_primitive(a), _js_to_primitive(b)
|
||||
if not isinstance(a, compat_basestring):
|
||||
a, b = b, a
|
||||
# Number to String: convert the string to a number
|
||||
# Conversion failure results in ... false
|
||||
if isinstance(a, compat_basestring):
|
||||
return float_or_none(a) == b
|
||||
return a == b
|
||||
|
||||
|
||||
def _js_neq(a, b):
|
||||
return not _js_eq(a, b)
|
||||
|
||||
|
||||
def _js_id_op(op):
|
||||
|
||||
@wraps_op(op)
|
||||
def wrapped(a, b):
|
||||
if set((a, b)) <= set((None, JS_Undefined)):
|
||||
return op(a, a)
|
||||
if _NaN in (a, b):
|
||||
return op(_NaN, None)
|
||||
if not isinstance(a, (compat_basestring, compat_numeric_types)):
|
||||
a, b = b, a
|
||||
# strings are === if ==
|
||||
# why 'a' is not 'a': https://stackoverflow.com/a/1504848
|
||||
if isinstance(a, (compat_basestring, compat_numeric_types)):
|
||||
return a == b if op(0, 0) else a != b
|
||||
return op(a, b)
|
||||
|
||||
return wrapped
|
||||
@ -138,25 +217,57 @@ def _js_ternary(cndn, if_true=True, if_false=False):
|
||||
return if_true
|
||||
|
||||
|
||||
def _js_unary_op(op):
|
||||
|
||||
@wraps_op(op)
|
||||
def wrapped(_, a):
|
||||
return op(a)
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/typeof
|
||||
def _js_typeof(expr):
|
||||
with compat_contextlib_suppress(TypeError, KeyError):
|
||||
return {
|
||||
JS_Undefined: 'undefined',
|
||||
_NaN: 'number',
|
||||
_Infinity: 'number',
|
||||
True: 'boolean',
|
||||
False: 'boolean',
|
||||
None: 'object',
|
||||
}[expr]
|
||||
for t, n in (
|
||||
(compat_basestring, 'string'),
|
||||
(compat_numeric_types, 'number'),
|
||||
):
|
||||
if isinstance(expr, t):
|
||||
return n
|
||||
if callable(expr):
|
||||
return 'function'
|
||||
# TODO: Symbol, BigInt
|
||||
return 'object'
|
||||
|
||||
|
||||
# (op, definition) in order of binding priority, tightest first
|
||||
# avoid dict to maintain order
|
||||
# definition None => Defined in JSInterpreter._operator
|
||||
_OPERATORS = (
|
||||
('>>', _js_bit_op(operator.rshift)),
|
||||
('<<', _js_bit_op(operator.lshift)),
|
||||
('+', _js_arith_op(operator.add)),
|
||||
('+', _js_add),
|
||||
('-', _js_arith_op(operator.sub)),
|
||||
('*', _js_arith_op(operator.mul)),
|
||||
('%', _js_mod),
|
||||
('/', _js_div),
|
||||
('/', _js_arith_op(operator.truediv, div=True)),
|
||||
('**', _js_exp),
|
||||
)
|
||||
|
||||
_COMP_OPERATORS = (
|
||||
('===', operator.is_),
|
||||
('!==', operator.is_not),
|
||||
('==', _js_eq_op(operator.eq)),
|
||||
('!=', _js_eq_op(operator.ne)),
|
||||
('===', _js_id_op(operator.is_)),
|
||||
('!==', _js_id_op(operator.is_not)),
|
||||
('==', _js_eq),
|
||||
('!=', _js_neq),
|
||||
('<=', _js_comp_op(operator.le)),
|
||||
('>=', _js_comp_op(operator.ge)),
|
||||
('<', _js_comp_op(operator.lt)),
|
||||
@ -176,6 +287,11 @@ _SC_OPERATORS = (
|
||||
('&&', None),
|
||||
)
|
||||
|
||||
_UNARY_OPERATORS_X = (
|
||||
('void', _js_unary_op(lambda _: JS_Undefined)),
|
||||
('typeof', _js_unary_op(_js_typeof)),
|
||||
)
|
||||
|
||||
_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
|
||||
|
||||
_NAME_RE = r'[a-zA-Z_$][\w$]*'
|
||||
@ -183,10 +299,6 @@ _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
|
||||
_QUOTES = '\'"/'
|
||||
|
||||
|
||||
class JS_Undefined(object):
|
||||
pass
|
||||
|
||||
|
||||
class JS_Break(ExtractorError):
|
||||
def __init__(self):
|
||||
ExtractorError.__init__(self, 'Invalid break')
|
||||
@ -242,6 +354,7 @@ class Debugger(object):
|
||||
|
||||
@classmethod
|
||||
def wrap_interpreter(cls, f):
|
||||
@wraps(f)
|
||||
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
|
||||
if cls.ENABLED and stmt.strip():
|
||||
cls.write(stmt, level=allow_recursion)
|
||||
@ -255,7 +368,7 @@ class Debugger(object):
|
||||
raise
|
||||
if cls.ENABLED and stmt.strip():
|
||||
if should_ret or repr(ret) != stmt:
|
||||
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
|
||||
cls.write(['->', '=>'][bool(should_ret)], repr(ret), '<-|', stmt, level=allow_recursion)
|
||||
return ret, should_ret
|
||||
return interpret_statement
|
||||
|
||||
@ -284,6 +397,9 @@ class JSInterpreter(object):
|
||||
RE_FLAGS = {
|
||||
# special knowledge: Python's re flags are bitmask values, current max 128
|
||||
# invent new bitmask values well above that for literal parsing
|
||||
# JS 'u' flag is effectively always set (surrogate pairs aren't seen),
|
||||
# but \u{...} and \p{...} escapes aren't handled); no additional JS 'v'
|
||||
# features are supported
|
||||
# TODO: execute matches with these flags (remaining: d, y)
|
||||
'd': 1024, # Generate indices for substring matches
|
||||
'g': 2048, # Global search
|
||||
@ -291,6 +407,7 @@ class JSInterpreter(object):
|
||||
'm': re.M, # Multi-line search
|
||||
's': re.S, # Allows . to match newline characters
|
||||
'u': re.U, # Treat a pattern as a sequence of unicode code points
|
||||
'v': re.U, # Like 'u' with extended character class and \p{} syntax
|
||||
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
|
||||
}
|
||||
|
||||
@ -347,6 +464,8 @@ class JSInterpreter(object):
|
||||
def __op_chars(cls):
|
||||
op_chars = set(';,[')
|
||||
for op in cls._all_operators():
|
||||
if op[0].isalpha():
|
||||
continue
|
||||
op_chars.update(op[0])
|
||||
return op_chars
|
||||
|
||||
@ -369,9 +488,18 @@ class JSInterpreter(object):
|
||||
skipping = 0
|
||||
if skip_delims:
|
||||
skip_delims = variadic(skip_delims)
|
||||
skip_txt = None
|
||||
for idx, char in enumerate(expr):
|
||||
if skip_txt and idx <= skip_txt[1]:
|
||||
continue
|
||||
paren_delta = 0
|
||||
if not in_quote:
|
||||
if char == '/' and expr[idx:idx + 2] == '/*':
|
||||
# skip a comment
|
||||
skip_txt = expr[idx:].find('*/', 2)
|
||||
skip_txt = [idx, idx + skip_txt + 1] if skip_txt >= 2 else None
|
||||
if skip_txt:
|
||||
continue
|
||||
if char in _MATCHING_PARENS:
|
||||
counters[_MATCHING_PARENS[char]] += 1
|
||||
paren_delta = 1
|
||||
@ -404,11 +532,18 @@ class JSInterpreter(object):
|
||||
if pos < delim_len:
|
||||
pos += 1
|
||||
continue
|
||||
if skip_txt and skip_txt[0] >= start and skip_txt[1] <= idx - delim_len:
|
||||
yield expr[start:skip_txt[0]] + expr[skip_txt[1] + 1: idx - delim_len]
|
||||
else:
|
||||
yield expr[start: idx - delim_len]
|
||||
skip_txt = None
|
||||
start, pos = idx + 1, 0
|
||||
splits += 1
|
||||
if max_split and splits >= max_split:
|
||||
break
|
||||
if skip_txt and skip_txt[0] >= start:
|
||||
yield expr[start:skip_txt[0]] + expr[skip_txt[1] + 1:]
|
||||
else:
|
||||
yield expr[start:]
|
||||
|
||||
@classmethod
|
||||
@ -425,7 +560,7 @@ class JSInterpreter(object):
|
||||
if not _cached:
|
||||
_cached.extend(itertools.chain(
|
||||
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
|
||||
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
|
||||
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS, _UNARY_OPERATORS_X))
|
||||
return _cached
|
||||
|
||||
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
||||
@ -449,13 +584,14 @@ class JSInterpreter(object):
|
||||
except Exception as e:
|
||||
raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
|
||||
|
||||
def _index(self, obj, idx, allow_undefined=False):
|
||||
if idx == 'length':
|
||||
def _index(self, obj, idx, allow_undefined=True):
|
||||
if idx == 'length' and isinstance(obj, list):
|
||||
return len(obj)
|
||||
try:
|
||||
return obj[int(idx)] if isinstance(obj, list) else obj[idx]
|
||||
except Exception as e:
|
||||
return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
|
||||
except (TypeError, KeyError, IndexError) as e:
|
||||
if allow_undefined:
|
||||
# when is not allowed?
|
||||
return JS_Undefined
|
||||
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
|
||||
|
||||
@ -467,7 +603,7 @@ class JSInterpreter(object):
|
||||
|
||||
# used below
|
||||
_VAR_RET_THROW_RE = re.compile(r'''(?x)
|
||||
(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["'])|$)|(?P<throw>throw\s+)
|
||||
(?:(?P<var>var|const|let)\s+|(?P<ret>return)(?:\s+|(?=["'])|$)|(?P<throw>throw)\s+)
|
||||
''')
|
||||
_COMPOUND_RE = re.compile(r'''(?x)
|
||||
(?P<try>try)\s*\{|
|
||||
@ -479,6 +615,52 @@ class JSInterpreter(object):
|
||||
_FINALLY_RE = re.compile(r'finally\s*\{')
|
||||
_SWITCH_RE = re.compile(r'switch\s*\(')
|
||||
|
||||
def handle_operators(self, expr, local_vars, allow_recursion):
|
||||
|
||||
for op, _ in self._all_operators():
|
||||
# hackety: </> have higher priority than <</>>, but don't confuse them
|
||||
skip_delim = (op + op) if op in '<>*?' else None
|
||||
if op == '?':
|
||||
skip_delim = (skip_delim, '?.')
|
||||
separated = list(self._separate(expr, op, skip_delims=skip_delim))
|
||||
if len(separated) < 2:
|
||||
continue
|
||||
|
||||
right_expr = separated.pop()
|
||||
# handle operators that are both unary and binary, minimal BODMAS
|
||||
if op in ('+', '-'):
|
||||
# simplify/adjust consecutive instances of these operators
|
||||
undone = 0
|
||||
separated = [s.strip() for s in separated]
|
||||
while len(separated) > 1 and not separated[-1]:
|
||||
undone += 1
|
||||
separated.pop()
|
||||
if op == '-' and undone % 2 != 0:
|
||||
right_expr = op + right_expr
|
||||
elif op == '+':
|
||||
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
|
||||
right_expr = separated.pop() + right_expr
|
||||
if separated[-1][-1:] in self.OP_CHARS:
|
||||
right_expr = separated.pop() + right_expr
|
||||
# hanging op at end of left => unary + (strip) or - (push right)
|
||||
left_val = separated[-1] if separated else ''
|
||||
for dm_op in ('*', '%', '/', '**'):
|
||||
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
|
||||
if len(bodmas) > 1 and not bodmas[-1].strip():
|
||||
expr = op.join(separated) + op + right_expr
|
||||
if len(separated) > 1:
|
||||
separated.pop()
|
||||
right_expr = op.join((left_val, right_expr))
|
||||
else:
|
||||
separated = [op.join((left_val, right_expr))]
|
||||
right_expr = None
|
||||
break
|
||||
if right_expr is None:
|
||||
continue
|
||||
|
||||
left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
|
||||
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), True
|
||||
|
||||
@Debugger.wrap_interpreter
|
||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||
if allow_recursion < 0:
|
||||
@ -501,7 +683,7 @@ class JSInterpreter(object):
|
||||
expr = stmt[len(m.group(0)):].strip()
|
||||
if m.group('throw'):
|
||||
raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
|
||||
should_return = not m.group('var')
|
||||
should_return = 'return' if m.group('ret') else False
|
||||
if not expr:
|
||||
return None, should_return
|
||||
|
||||
@ -533,9 +715,15 @@ class JSInterpreter(object):
|
||||
else:
|
||||
raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
|
||||
|
||||
if expr.startswith('void '):
|
||||
left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
|
||||
return None, should_return
|
||||
for op, _ in _UNARY_OPERATORS_X:
|
||||
if not expr.startswith(op):
|
||||
continue
|
||||
operand = expr[len(op):]
|
||||
if not operand or operand[0] != ' ':
|
||||
continue
|
||||
op_result = self.handle_operators(expr, local_vars, allow_recursion)
|
||||
if op_result:
|
||||
return op_result[0], should_return
|
||||
|
||||
if expr.startswith('{'):
|
||||
inner, outer = self._separate_at_paren(expr)
|
||||
@ -582,7 +770,7 @@ class JSInterpreter(object):
|
||||
if_expr, expr = self._separate_at_paren(expr)
|
||||
else:
|
||||
# may lose ... else ... because of ll.368-374
|
||||
if_expr, expr = self._separate_at_paren(expr, delim=';')
|
||||
if_expr, expr = self._separate_at_paren(' %s;' % (expr,), delim=';')
|
||||
else_expr = None
|
||||
m = re.match(r'else\s*(?P<block>\{)?', expr)
|
||||
if m:
|
||||
@ -720,7 +908,7 @@ class JSInterpreter(object):
|
||||
start, end = m.span()
|
||||
sign = m.group('pre_sign') or m.group('post_sign')
|
||||
ret = local_vars[var]
|
||||
local_vars[var] += 1 if sign[0] == '+' else -1
|
||||
local_vars[var] = _js_add(ret, 1 if sign[0] == '+' else -1)
|
||||
if m.group('pre_sign'):
|
||||
ret = local_vars[var]
|
||||
expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
|
||||
@ -730,13 +918,13 @@ class JSInterpreter(object):
|
||||
|
||||
m = re.match(r'''(?x)
|
||||
(?P<assign>
|
||||
(?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
|
||||
(?P<out>{_NAME_RE})(?:\[(?P<out_idx>(?:.+?\]\s*\[)*.+?)\])?\s*
|
||||
(?P<op>{_OPERATOR_RE})?
|
||||
=(?!=)(?P<expr>.*)$
|
||||
)|(?P<return>
|
||||
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
|
||||
)|(?P<indexing>
|
||||
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
|
||||
(?P<in>{_NAME_RE})\[(?P<in_idx>(?:.+?\]\s*\[)*.+?)\]$
|
||||
)|(?P<attribute>
|
||||
(?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
|
||||
)|(?P<function>
|
||||
@ -746,19 +934,23 @@ class JSInterpreter(object):
|
||||
if md.get('assign'):
|
||||
left_val = local_vars.get(m.group('out'))
|
||||
|
||||
if not m.group('index'):
|
||||
if not m.group('out_idx'):
|
||||
local_vars[m.group('out')] = self._operator(
|
||||
m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
|
||||
return local_vars[m.group('out')], should_return
|
||||
elif left_val in (None, JS_Undefined):
|
||||
raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
|
||||
|
||||
idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
|
||||
if not isinstance(idx, (int, float)):
|
||||
raise self.Exception('List index %s must be integer' % (idx, ), expr=expr)
|
||||
indexes = re.split(r'\]\s*\[', m.group('out_idx'))
|
||||
for i, idx in enumerate(indexes, 1):
|
||||
idx = self.interpret_expression(idx, local_vars, allow_recursion)
|
||||
if i < len(indexes):
|
||||
left_val = self._index(left_val, idx)
|
||||
if isinstance(idx, float):
|
||||
idx = int(idx)
|
||||
left_val[idx] = self._operator(
|
||||
m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
|
||||
m.group('op'), self._index(left_val, idx) if m.group('op') else None,
|
||||
m.group('expr'), expr, local_vars, allow_recursion)
|
||||
return left_val[idx], should_return
|
||||
|
||||
elif expr.isdigit():
|
||||
@ -776,63 +968,31 @@ class JSInterpreter(object):
|
||||
return _Infinity, should_return
|
||||
|
||||
elif md.get('return'):
|
||||
return local_vars[m.group('name')], should_return
|
||||
ret = local_vars[m.group('name')]
|
||||
# challenge may try to force returning the original value
|
||||
# use an optional internal var to block this
|
||||
if should_return == 'return':
|
||||
if '_ytdl_do_not_return' not in local_vars:
|
||||
return ret, True
|
||||
return (ret, True) if ret != local_vars['_ytdl_do_not_return'] else (ret, False)
|
||||
else:
|
||||
return ret, should_return
|
||||
|
||||
try:
|
||||
with compat_contextlib_suppress(ValueError):
|
||||
ret = json.loads(js_to_json(expr)) # strict=True)
|
||||
if not md.get('attribute'):
|
||||
return ret, should_return
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if md.get('indexing'):
|
||||
val = local_vars[m.group('in')]
|
||||
idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
|
||||
return self._index(val, idx), should_return
|
||||
for idx in re.split(r'\]\s*\[', m.group('in_idx')):
|
||||
idx = self.interpret_expression(idx, local_vars, allow_recursion)
|
||||
val = self._index(val, idx)
|
||||
return val, should_return
|
||||
|
||||
for op, _ in self._all_operators():
|
||||
# hackety: </> have higher priority than <</>>, but don't confuse them
|
||||
skip_delim = (op + op) if op in '<>*?' else None
|
||||
if op == '?':
|
||||
skip_delim = (skip_delim, '?.')
|
||||
separated = list(self._separate(expr, op, skip_delims=skip_delim))
|
||||
if len(separated) < 2:
|
||||
continue
|
||||
|
||||
right_expr = separated.pop()
|
||||
# handle operators that are both unary and binary, minimal BODMAS
|
||||
if op in ('+', '-'):
|
||||
# simplify/adjust consecutive instances of these operators
|
||||
undone = 0
|
||||
separated = [s.strip() for s in separated]
|
||||
while len(separated) > 1 and not separated[-1]:
|
||||
undone += 1
|
||||
separated.pop()
|
||||
if op == '-' and undone % 2 != 0:
|
||||
right_expr = op + right_expr
|
||||
elif op == '+':
|
||||
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
|
||||
right_expr = separated.pop() + right_expr
|
||||
if separated[-1][-1:] in self.OP_CHARS:
|
||||
right_expr = separated.pop() + right_expr
|
||||
# hanging op at end of left => unary + (strip) or - (push right)
|
||||
left_val = separated[-1] if separated else ''
|
||||
for dm_op in ('*', '%', '/', '**'):
|
||||
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
|
||||
if len(bodmas) > 1 and not bodmas[-1].strip():
|
||||
expr = op.join(separated) + op + right_expr
|
||||
if len(separated) > 1:
|
||||
separated.pop()
|
||||
right_expr = op.join((left_val, right_expr))
|
||||
else:
|
||||
separated = [op.join((left_val, right_expr))]
|
||||
right_expr = None
|
||||
break
|
||||
if right_expr is None:
|
||||
continue
|
||||
|
||||
left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
|
||||
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
|
||||
op_result = self.handle_operators(expr, local_vars, allow_recursion)
|
||||
if op_result:
|
||||
return op_result[0], should_return
|
||||
|
||||
if md.get('attribute'):
|
||||
variable, member, nullish = m.group('var', 'member', 'nullish')
|
||||
@ -877,7 +1037,7 @@ class JSInterpreter(object):
|
||||
|
||||
# Member access
|
||||
if arg_str is None:
|
||||
return self._index(obj, member, nullish)
|
||||
return self._index(obj, member)
|
||||
|
||||
# Function call
|
||||
argvals = [
|
||||
@ -904,7 +1064,7 @@ class JSInterpreter(object):
|
||||
if obj is compat_str:
|
||||
if member == 'fromCharCode':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
return ''.join(map(compat_chr, argvals))
|
||||
return ''.join(compat_chr(int(n)) for n in argvals)
|
||||
raise self.Exception('Unsupported string method ' + member, expr=expr)
|
||||
elif obj is float:
|
||||
if member == 'pow':
|
||||
@ -913,13 +1073,47 @@ class JSInterpreter(object):
|
||||
raise self.Exception('Unsupported Math method ' + member, expr=expr)
|
||||
|
||||
if member == 'split':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(len(argvals) == 1, 'with limit argument is not implemented')
|
||||
return obj.split(argvals[0]) if argvals[0] else list(obj)
|
||||
assertion(len(argvals) <= 2, 'takes at most two arguments')
|
||||
if len(argvals) > 1:
|
||||
limit = argvals[1]
|
||||
assertion(isinstance(limit, int) and limit >= 0, 'integer limit >= 0')
|
||||
if limit == 0:
|
||||
return []
|
||||
else:
|
||||
limit = 0
|
||||
if len(argvals) == 0:
|
||||
argvals = [JS_Undefined]
|
||||
elif isinstance(argvals[0], self.JS_RegExp):
|
||||
# avoid re.split(), similar but not enough
|
||||
|
||||
def where():
|
||||
for m in argvals[0].finditer(obj):
|
||||
yield m.span(0)
|
||||
yield (None, None)
|
||||
|
||||
def splits(limit=limit):
|
||||
i = 0
|
||||
for j, jj in where():
|
||||
if j == jj == 0:
|
||||
continue
|
||||
if j is None and i >= len(obj):
|
||||
break
|
||||
yield obj[i:j]
|
||||
if jj is None or limit == 1:
|
||||
break
|
||||
limit -= 1
|
||||
i = jj
|
||||
|
||||
return list(splits())
|
||||
return (
|
||||
obj.split(argvals[0], limit - 1) if argvals[0] and argvals[0] != JS_Undefined
|
||||
else list(obj)[:limit or None])
|
||||
elif member == 'join':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(len(argvals) == 1, 'takes exactly one argument')
|
||||
return argvals[0].join(obj)
|
||||
assertion(len(argvals) <= 1, 'takes at most one argument')
|
||||
return (',' if len(argvals) == 0 else argvals[0]).join(
|
||||
('' if x in (None, JS_Undefined) else _js_toString(x))
|
||||
for x in obj)
|
||||
elif member == 'reverse':
|
||||
assertion(not argvals, 'does not take any arguments')
|
||||
obj.reverse()
|
||||
@ -941,37 +1135,31 @@ class JSInterpreter(object):
|
||||
index, how_many = map(int, (argvals + [len(obj)])[:2])
|
||||
if index < 0:
|
||||
index += len(obj)
|
||||
add_items = argvals[2:]
|
||||
res = []
|
||||
for _ in range(index, min(index + how_many, len(obj))):
|
||||
res.append(obj.pop(index))
|
||||
for i, item in enumerate(add_items):
|
||||
obj.insert(index + i, item)
|
||||
res = [obj.pop(index)
|
||||
for _ in range(index, min(index + how_many, len(obj)))]
|
||||
obj[index:index] = argvals[2:]
|
||||
return res
|
||||
elif member == 'unshift':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
for item in reversed(argvals):
|
||||
obj.insert(0, item)
|
||||
return obj
|
||||
elif member == 'pop':
|
||||
elif member in ('shift', 'pop'):
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(not argvals, 'does not take any arguments')
|
||||
if not obj:
|
||||
return
|
||||
return obj.pop()
|
||||
return obj.pop(0 if member == 'shift' else -1) if len(obj) > 0 else JS_Undefined
|
||||
elif member == 'unshift':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
# not enforced: assertion(argvals, 'takes one or more arguments')
|
||||
obj[0:0] = argvals
|
||||
return len(obj)
|
||||
elif member == 'push':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
# not enforced: assertion(argvals, 'takes one or more arguments')
|
||||
obj.extend(argvals)
|
||||
return obj
|
||||
return len(obj)
|
||||
elif member == 'forEach':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at most 2 arguments')
|
||||
f, this = (argvals + [''])[:2]
|
||||
return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)]
|
||||
elif member == 'indexOf':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at most 2 arguments')
|
||||
idx, start = (argvals + [0])[:2]
|
||||
try:
|
||||
return obj.index(idx, start)
|
||||
@ -980,7 +1168,7 @@ class JSInterpreter(object):
|
||||
elif member == 'charCodeAt':
|
||||
assertion(isinstance(obj, compat_str), 'must be applied on a string')
|
||||
# assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced
|
||||
idx = argvals[0] if isinstance(argvals[0], int) else 0
|
||||
idx = argvals[0] if len(argvals) > 0 and isinstance(argvals[0], int) else 0
|
||||
if idx >= len(obj):
|
||||
return None
|
||||
return ord(obj[idx])
|
||||
@ -1031,7 +1219,7 @@ class JSInterpreter(object):
|
||||
yield self.interpret_expression(v, local_vars, allow_recursion)
|
||||
|
||||
def extract_object(self, objname):
|
||||
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
||||
_FUNC_NAME_RE = r'''(?:{n}|"{n}"|'{n}')'''.format(n=_NAME_RE)
|
||||
obj = {}
|
||||
fields = next(filter(None, (
|
||||
obj_m.group('fields') for obj_m in re.finditer(
|
||||
@ -1090,6 +1278,7 @@ class JSInterpreter(object):
|
||||
|
||||
def extract_function_from_code(self, argnames, code, *global_stack):
|
||||
local_vars = {}
|
||||
|
||||
while True:
|
||||
mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
|
||||
if mobj is None:
|
||||
@ -1100,10 +1289,11 @@ class JSInterpreter(object):
|
||||
[x.strip() for x in mobj.group('args').split(',')],
|
||||
body, local_vars, *global_stack))
|
||||
code = code[:start] + name + remaining
|
||||
|
||||
return self.build_function(argnames, code, local_vars, *global_stack)
|
||||
|
||||
def call_function(self, funcname, *args):
|
||||
return self.extract_function(funcname)(args)
|
||||
def call_function(self, funcname, *args, **kw_global_vars):
|
||||
return self.extract_function(funcname)(args, kw_global_vars)
|
||||
|
||||
@classmethod
|
||||
def build_arglist(cls, arg_text):
|
||||
@ -1122,8 +1312,9 @@ class JSInterpreter(object):
|
||||
global_stack = list(global_stack) or [{}]
|
||||
argnames = tuple(argnames)
|
||||
|
||||
def resf(args, kwargs={}, allow_recursion=100):
|
||||
global_stack[0].update(zip_longest(argnames, args, fillvalue=None))
|
||||
def resf(args, kwargs=None, allow_recursion=100):
|
||||
kwargs = kwargs or {}
|
||||
global_stack[0].update(zip_longest(argnames, args, fillvalue=JS_Undefined))
|
||||
global_stack[0].update(kwargs)
|
||||
var_stack = LocalNameSpace(*global_stack)
|
||||
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
|
||||
|
Loading…
Reference in New Issue
Block a user