mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2026-06-11 07:00:15 +00:00
Compare commits
135 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c21b1fbeeb | |||
| f920ce295e | |||
| 7a7bd19c45 | |||
| 8f4b58d70e | |||
| 3fd45e03bf | |||
| 869b4aeff4 | |||
| cc9ca3ba6e | |||
| ea71034bd3 | |||
| 9fffd0469f | |||
| ae7773942e | |||
| 469a64cebf | |||
| aae3fdcfae | |||
| 6a66904f8e | |||
| 78271e3319 | |||
| 92bf0bcdf8 | |||
| 1283204917 | |||
| 6789defea9 | |||
| acf2a6e97b | |||
| 8cfb6efe6f | |||
| 04edb9caf5 | |||
| 044131ba21 | |||
| 0a7055c90d | |||
| 9e3f19919a | |||
| 4a3da4ebdb | |||
| 027008b14e | |||
| c6df692466 | |||
| acf757f42e | |||
| dd8982f19c | |||
| 654bd52f58 | |||
| a9551e9020 | |||
| 4e980275b5 | |||
| c172440ac5 | |||
| e332772531 | |||
| 437cac8cc1 | |||
| 9f281cacd2 | |||
| 748a0fab8a | |||
| c1f06d6307 | |||
| c4e817ce4a | |||
| 9a3e5e6955 | |||
| 228d30ed06 | |||
| 057c0609fc | |||
| 17d2712d9c | |||
| fc09240e24 | |||
| 146303136f | |||
| 96aded8d3d | |||
| 2886be15aa | |||
| ca0f500ecf | |||
| 29aef5a33c | |||
| 9158b2b301 | |||
| 0196149c5b | |||
| 8f9312c387 | |||
| 439b9a9e9b | |||
| 8c72beb25e | |||
| 1ee94db2d0 | |||
| e77d2975af | |||
| e41b1f7385 | |||
| cd596028d6 | |||
| cc57bd33a8 | |||
| 6d593c3276 | |||
| 91755ee384 | |||
| 0692ef86ef | |||
| 439d9be27d | |||
| b80505a409 | |||
| e4c17d7274 | |||
| 2c58674e0e | |||
| ef1269fb07 | |||
| e525d9a3df | |||
| 20b4492c71 | |||
| dee3f73787 | |||
| d543bdc351 | |||
| c7ff0c6422 | |||
| 01c46659c4 | |||
| b04b885271 | |||
| dc35bfd2d5 | |||
| 70fca8d694 | |||
| a52c633536 | |||
| 7b6c60393e | |||
| 83e7a314b4 | |||
| 749f2ca044 | |||
| 5468ff4d91 | |||
| 1d2daaea63 | |||
| 52585fd6dc | |||
| c03844a4ec | |||
| 6449cd807e | |||
| e2a08185c6 | |||
| 5d6677ca28 | |||
| 5a8a29cfea | |||
| c1708b89c0 | |||
| 83fddfd493 | |||
| 1798791df1 | |||
| 6ebb0dca9f | |||
| cf8d6ec865 | |||
| f452f72c6b | |||
| 3198291f26 | |||
| 02c1d5e285 | |||
| ec4161a57d | |||
| 03d8d4df38 | |||
| 03d2d6d51b | |||
| 83fda3c000 | |||
| 4fe8495a23 | |||
| a16f6643f0 | |||
| adc0ae3ceb | |||
| 7bb3ceb4c7 | |||
| 75a4fc5b72 | |||
| 87673cd438 | |||
| f345fe9db7 | |||
| e683a48d0e | |||
| a7a14d9586 | |||
| 219337990b | |||
| 376a770cc4 | |||
| 7e500dbd93 | |||
| affd04a45d | |||
| c84130e865 | |||
| 4f264c02c7 | |||
| d205476103 | |||
| 367cc95aa7 | |||
| 206dba27a4 | |||
| dcf53d4408 | |||
| 63be3b8989 | |||
| 18b4e9e79d | |||
| cb454b333d | |||
| e0d9f85aee | |||
| b04fbd789c | |||
| aad9556414 | |||
| 48a1e5141a | |||
| 0865f397ae | |||
| 796df3c631 | |||
| a28383834b | |||
| 3a0d2f520a | |||
| 6348ad12a0 | |||
| fe7710cbcc | |||
| 2103d038b3 | |||
| 6ca85be6f8 | |||
| 9f0df77ab1 | |||
| e72c7e4123 |
@@ -106,3 +106,5 @@ Johan K. Jensen
|
||||
Yen Chi Hsuan
|
||||
Enam Mijbah Noor
|
||||
David Luhmer
|
||||
Shaya Goldberg
|
||||
Paul Hartmann
|
||||
|
||||
@@ -292,9 +292,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
video results by putting a condition in
|
||||
brackets, as in -f "best[height=720]" (or
|
||||
-f "[filesize>10M]"). This works for
|
||||
filesize, height, width, tbr, abr, and vbr
|
||||
and the comparisons <, <=, >, >=, =, != .
|
||||
Formats for which the value is not known
|
||||
filesize, height, width, tbr, abr, vbr, and
|
||||
fps and the comparisons <, <=, >, >=, =, !=
|
||||
. Formats for which the value is not known
|
||||
are excluded unless you put a question mark
|
||||
(?) after the operator. You can combine
|
||||
format filters, so -f "[height <=?
|
||||
@@ -368,11 +368,11 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--add-metadata write metadata to the video file
|
||||
--xattrs write metadata to the video file's xattrs
|
||||
(using dublin core and xdg standards)
|
||||
--fixup POLICY (experimental) Automatically correct known
|
||||
faults of the file. One of never (do
|
||||
nothing), warn (only emit a warning),
|
||||
detect_or_warn(check whether we can do
|
||||
anything about it, warn otherwise
|
||||
--fixup POLICY Automatically correct known faults of the
|
||||
file. One of never (do nothing), warn (only
|
||||
emit a warning), detect_or_warn(the
|
||||
default; fix file if we can, warn
|
||||
otherwise)
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
||||
postprocessors (default)
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
@@ -525,9 +525,16 @@ From then on, after restarting your shell, you will be able to access both youtu
|
||||
|
||||
Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration).
|
||||
|
||||
### How do I download a video starting with a `-` ?
|
||||
|
||||
Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
|
||||
|
||||
youtube-dl -- -wNyEUrxzFU
|
||||
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
|
||||
|
||||
### How can I detect whether a given URL is supported by youtube-dl?
|
||||
|
||||
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||
|
||||
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
||||
|
||||
|
||||
+9
-3
@@ -148,9 +148,15 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
|
||||
else:
|
||||
return repr(v)
|
||||
info_dict_str = ''.join(
|
||||
' %s: %s,\n' % (_repr(k), _repr(v))
|
||||
for k, v in test_info_dict.items())
|
||||
info_dict_str = ''
|
||||
if len(missing_keys) != len(expected_dict):
|
||||
info_dict_str += ''.join(
|
||||
' %s: %s,\n' % (_repr(k), _repr(v))
|
||||
for k, v in test_info_dict.items() if k not in missing_keys)
|
||||
info_dict_str += '\n'
|
||||
info_dict_str += ''.join(
|
||||
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
|
||||
for k in missing_keys)
|
||||
write_string(
|
||||
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
|
||||
self.assertFalse(
|
||||
|
||||
@@ -89,7 +89,7 @@ def generator(test_case):
|
||||
|
||||
for tc in test_cases:
|
||||
info_dict = tc.get('info_dict', {})
|
||||
if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
|
||||
if not (info_dict.get('id') and info_dict.get('ext')):
|
||||
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
|
||||
|
||||
if 'skip' in test_case:
|
||||
@@ -116,7 +116,7 @@ def generator(test_case):
|
||||
expect_warnings(ydl, test_case.get('expected_warnings', []))
|
||||
|
||||
def get_tc_filename(tc):
|
||||
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
|
||||
return ydl.prepare_filename(tc.get('info_dict', {}))
|
||||
|
||||
res_dict = None
|
||||
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/video.html':
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
||||
elif self.path == '/vid.mp4':
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'video/mp4')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
class FakeLogger(object):
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def warning(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
pass
|
||||
|
||||
|
||||
class TestHTTP(unittest.TestCase):
|
||||
def setUp(self):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.httpd = compat_http_server.HTTPServer(
|
||||
('localhost', 0), HTTPTestRequestHandler)
|
||||
self.httpd.socket = ssl.wrap_socket(
|
||||
self.httpd.socket, certfile=certfn, server_side=True)
|
||||
self.port = self.httpd.socket.getsockname()[1]
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
def test_nocheckcertificate(self):
|
||||
if sys.version_info >= (2, 7, 9): # No certificate checking anyways
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
self.assertRaises(
|
||||
Exception,
|
||||
ydl.extract_info, 'https://localhost:%d/video.html' % self.port)
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.jsinterp import JSInterpreter
|
||||
|
||||
|
||||
class TestJSInterpreter(unittest.TestCase):
|
||||
def test_basic(self):
|
||||
jsi = JSInterpreter('function x(){;}')
|
||||
self.assertEqual(jsi.call_function('x'), None)
|
||||
|
||||
jsi = JSInterpreter('function x3(){return 42;}')
|
||||
self.assertEqual(jsi.call_function('x3'), 42)
|
||||
|
||||
def test_calc(self):
|
||||
jsi = JSInterpreter('function x4(a){return 2*a+1;}')
|
||||
self.assertEqual(jsi.call_function('x4', 3), 7)
|
||||
|
||||
def test_empty_return(self):
|
||||
jsi = JSInterpreter('function f(){return; y()}')
|
||||
self.assertEqual(jsi.call_function('f'), None)
|
||||
|
||||
def test_morespace(self):
|
||||
jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }')
|
||||
self.assertEqual(jsi.call_function('x', 3), 7)
|
||||
|
||||
jsi = JSInterpreter('function f () { x = 2 ; return x; }')
|
||||
self.assertEqual(jsi.call_function('f'), 2)
|
||||
|
||||
def test_strange_chars(self):
|
||||
jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }')
|
||||
self.assertEqual(jsi.call_function('$_xY1', 20), 21)
|
||||
|
||||
def test_operators(self):
|
||||
jsi = JSInterpreter('function f(){return 1 << 5;}')
|
||||
self.assertEqual(jsi.call_function('f'), 32)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 19 & 21;}')
|
||||
self.assertEqual(jsi.call_function('f'), 17)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 11 >> 2;}')
|
||||
self.assertEqual(jsi.call_function('f'), 2)
|
||||
|
||||
def test_array_access(self):
|
||||
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
|
||||
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
|
||||
|
||||
def test_parens(self):
|
||||
jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}')
|
||||
self.assertEqual(jsi.call_function('f'), 7)
|
||||
|
||||
jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
|
||||
self.assertEqual(jsi.call_function('f'), 9)
|
||||
|
||||
def test_assignments(self):
|
||||
jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
|
||||
self.assertEqual(jsi.call_function('f'), 31)
|
||||
|
||||
jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}')
|
||||
self.assertEqual(jsi.call_function('f'), 51)
|
||||
|
||||
jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}')
|
||||
self.assertEqual(jsi.call_function('f'), -11)
|
||||
|
||||
def test_comments(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() {
|
||||
var x = /* 1 + */ 2;
|
||||
var y = /* 30
|
||||
* 40 */ 50;
|
||||
return x + y;
|
||||
}
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 52)
|
||||
|
||||
def test_precedence(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() {
|
||||
var a = [10, 20, 30, 40, 50];
|
||||
var b = 6;
|
||||
a[0]=a[b%a.length];
|
||||
return a;
|
||||
}''')
|
||||
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -238,6 +238,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('5 s'), 5)
|
||||
self.assertEqual(parse_duration('3 min'), 180)
|
||||
self.assertEqual(parse_duration('2.5 hours'), 9000)
|
||||
self.assertEqual(parse_duration('02:03:04'), 7384)
|
||||
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
@@ -371,6 +373,16 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{"abc": true}')
|
||||
self.assertEqual(json.loads(on), {'abc': True})
|
||||
|
||||
# Ignore JavaScript code as well
|
||||
on = js_to_json('''{
|
||||
"x": 1,
|
||||
y: "a",
|
||||
z: some.code
|
||||
}''')
|
||||
d = json.loads(on)
|
||||
self.assertEqual(d['x'], 1)
|
||||
self.assertEqual(d['y'], 'a')
|
||||
|
||||
def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDMF0bAzaHAdIyB
|
||||
HRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaUYF1uTcNp
|
||||
Qx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQqO6BVg4+h
|
||||
A1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8A4CK58Ev
|
||||
mMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRhKxUhmw0J
|
||||
aobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/Mo83KyiP
|
||||
tKMCSQulAgMBAAECggEALCfBDAexPjU5DNoh6bIorUXxIJzxTNzNHCdvgbCGiA54
|
||||
BBKPh8s6qwazpnjT6WQWDIg/O5zZufqjE4wM9x4+0Zoqfib742ucJO9wY4way6x4
|
||||
Clt0xzbLPabB+MoZ4H7ip+9n2+dImhe7pGdYyOHoNYeOL57BBi1YFW42Hj6u/8pd
|
||||
63YCXisto3Rz1YvRQVjwsrS+cRKZlzAFQRviL30jav7Wh1aWEfcXxjj4zhm8pJdk
|
||||
ITGtq6howz57M0NtX6hZnfe8ywzTnDFIGKIMA2cYHuYJcBh9bc4tCGubTvTKK9UE
|
||||
8fM+f6UbfGqfpKCq1mcgs0XMoFDSzKS9+mSJn0+5JQKBgQD+OCKaeH3Yzw5zGnlw
|
||||
XuQfMJGNcgNr+ImjmvzUAC2fAZUJLAcQueE5kzMv5Fmd+EFE2CEX1Vit3tg0SXvA
|
||||
G+bq609doILHMA03JHnV1npO/YNIhG3AAtJlKYGxQNfWH9mflYj9mEui8ZFxG52o
|
||||
zWhHYuifOjjZszUR+/eio6NPzwKBgQDNhUBTrT8LIX4SE/EFUiTlYmWIvOMgXYvN
|
||||
8Cm3IRNQ/yyphZaXEU0eJzfX5uCDfSVOgd6YM/2pRah+t+1Hvey4H8e0GVTu5wMP
|
||||
gkkqwKPGIR1YOmlw6ippqwvoJD7LuYrm6Q4D6e1PvkjwCq6lEndrOPmPrrXNd0JJ
|
||||
XO60y3U2SwKBgQDLkyZarryQXxcCI6Q10Tc6pskYDMIit095PUbTeiUOXNT9GE28
|
||||
Hi32ziLCakk9kCysNasii81MxtQ54tJ/f5iGbNMMddnkKl2a19Hc5LjjAm4cJzg/
|
||||
98KGEhvyVqvAo5bBDZ06/rcrD+lZOzUglQS5jcIcqCIYa0LHWQ/wJLxFzwKBgFcZ
|
||||
1SRhdSmDfUmuF+S4ZpistflYjC3IV5rk4NkS9HvMWaJS0nqdw4A3AMzItXgkjq4S
|
||||
DkOVLTkTI5Do5HAWRv/VwC5M2hkR4NMu1VGAKSisGiKtRsirBWSZMEenLNHshbjN
|
||||
Jrpz5rZ4H7NT46ZkCCZyFBpX4gb9NyOedjA7Via3AoGARF8RxbYjnEGGFuhnbrJB
|
||||
FTPR0vaL4faY3lOgRZ8jOG9V2c9Hzi/y8a8TU4C11jnJSDqYCXBTd5XN28npYxtD
|
||||
pjRsCwy6ze+yvYXPO7C978eMG3YRyj366NXUxnXN59ibwe/lxi2OD9z8J1LEdF6z
|
||||
VJua1Wn8HKxnXMI61DhTCSo=
|
||||
-----END PRIVATE KEY-----
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIEEzCCAvugAwIBAgIJAK1haYi6gmSKMA0GCSqGSIb3DQEBCwUAMIGeMQswCQYD
|
||||
VQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEbMBkG
|
||||
A1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRsIHRl
|
||||
c3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhhZ0Bw
|
||||
aGloYWcuZGUwIBcNMTUwMTMwMDExNTA4WhgPMjExNTAxMDYwMTE1MDhaMIGeMQsw
|
||||
CQYDVQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEb
|
||||
MBkGA1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRs
|
||||
IHRlc3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhh
|
||||
Z0BwaGloYWcuZGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMF0bA
|
||||
zaHAdIyBHRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaU
|
||||
YF1uTcNpQx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQq
|
||||
O6BVg4+hA1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8
|
||||
A4CK58EvmMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRh
|
||||
KxUhmw0JaobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/
|
||||
Mo83KyiPtKMCSQulAgMBAAGjUDBOMB0GA1UdDgQWBBTBUZoqhQkzHQ6xNgZfFxOd
|
||||
ZEVt8TAfBgNVHSMEGDAWgBTBUZoqhQkzHQ6xNgZfFxOdZEVt8TAMBgNVHRMEBTAD
|
||||
AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQCUOCl3T/J9B08Z+ijfOJAtkbUaEHuVZb4x
|
||||
5EpZSy2ZbkLvtsftMFieHVNXn9dDswQc5qjYStCC4o60LKw4M6Y63FRsAZ/DNaqb
|
||||
PY3jyCyuugZ8/sNf50vHYkAcF7SQYqOQFQX4TQsNUk2xMJIt7H0ErQFmkf/u3dg6
|
||||
cy89zkT462IwxzSG7NNhIlRkL9o5qg+Y1mF9eZA1B0rcL6hO24PPTHOd90HDChBu
|
||||
SZ6XMi/LzYQSTf0Vg2R+uMIVlzSlkdcZ6sqVnnqeLL8dFyIa4e9sj/D4ZCYP8Mqe
|
||||
Z73H5/NNhmwCHRqVUTgm307xblQaWGhwAiDkaRvRW2aJQ0qGEdZK
|
||||
-----END CERTIFICATE-----
|
||||
+12
-8
@@ -25,6 +25,7 @@ if os.name == 'nt':
|
||||
import ctypes
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
compat_cookiejar,
|
||||
compat_expanduser,
|
||||
compat_http_client,
|
||||
@@ -543,6 +544,11 @@ class YoutubeDL(object):
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
tmpl = compat_expanduser(outtmpl)
|
||||
filename = tmpl % template_dict
|
||||
# Temporary fix for #4787
|
||||
# 'Treat' all problem characters by passing filename through preferredencoding
|
||||
# to workaround encoding issues with subprocess on python2 @ Windows
|
||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
filename = encodeFilename(filename, True).decode(preferredencoding())
|
||||
return filename
|
||||
except ValueError as err:
|
||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
@@ -820,7 +826,7 @@ class YoutubeDL(object):
|
||||
'!=': operator.ne,
|
||||
}
|
||||
operator_rex = re.compile(r'''(?x)\s*\[
|
||||
(?P<key>width|height|tbr|abr|vbr|filesize)
|
||||
(?P<key>width|height|tbr|abr|vbr|filesize|fps)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
||||
\]$
|
||||
@@ -953,7 +959,7 @@ class YoutubeDL(object):
|
||||
if thumbnails is None:
|
||||
thumbnail = info_dict.get('thumbnail')
|
||||
if thumbnail:
|
||||
thumbnails = [{'url': thumbnail}]
|
||||
info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
|
||||
if thumbnails:
|
||||
thumbnails.sort(key=lambda t: (
|
||||
t.get('preference'), t.get('width'), t.get('height'),
|
||||
@@ -1069,7 +1075,8 @@ class YoutubeDL(object):
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
'ext': formats_info[0]['ext'],
|
||||
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
|
||||
formats_info[1].get('format_id')),
|
||||
'width': formats_info[0].get('width'),
|
||||
'height': formats_info[0].get('height'),
|
||||
'resolution': formats_info[0].get('resolution'),
|
||||
@@ -1130,7 +1137,7 @@ class YoutubeDL(object):
|
||||
|
||||
self._num_downloads += 1
|
||||
|
||||
filename = self.prepare_filename(info_dict)
|
||||
info_dict['_filename'] = filename = self.prepare_filename(info_dict)
|
||||
|
||||
# Forced printings
|
||||
if self.params.get('forcetitle', False):
|
||||
@@ -1155,10 +1162,7 @@ class YoutubeDL(object):
|
||||
if self.params.get('forceformat', False):
|
||||
self.to_stdout(info_dict['format'])
|
||||
if self.params.get('forcejson', False):
|
||||
info_dict['_filename'] = filename
|
||||
self.to_stdout(json.dumps(info_dict))
|
||||
if self.params.get('dump_single_json', False):
|
||||
info_dict['_filename'] = filename
|
||||
|
||||
# Do nothing else if in simulate mode
|
||||
if self.params.get('simulate', False):
|
||||
@@ -1555,7 +1559,7 @@ class YoutubeDL(object):
|
||||
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||
# To work around aforementioned issue we will replace request's original URL with
|
||||
# percent-encoded one
|
||||
req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
|
||||
req_is_string = isinstance(req, compat_basestring)
|
||||
url = req if req_is_string else req.get_full_url()
|
||||
url_escaped = escape_url(url)
|
||||
|
||||
|
||||
@@ -361,7 +361,9 @@ def _real_main(argv=None):
|
||||
sys.exit()
|
||||
|
||||
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
|
||||
parser.error('you must provide at least one URL')
|
||||
parser.error(
|
||||
'You must provide at least one URL.\n'
|
||||
'Type youtube-dl --help to see a list of all options.')
|
||||
|
||||
try:
|
||||
if opts.load_info_filename is not None:
|
||||
|
||||
+28
-16
@@ -71,6 +71,11 @@ try:
|
||||
except ImportError:
|
||||
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
||||
|
||||
try:
|
||||
import http.server as compat_http_server
|
||||
except ImportError:
|
||||
import BaseHTTPServer as compat_http_server
|
||||
|
||||
try:
|
||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||
except ImportError:
|
||||
@@ -109,6 +114,26 @@ except ImportError:
|
||||
string += pct_sequence.decode(encoding, errors)
|
||||
return string
|
||||
|
||||
try:
|
||||
compat_str = unicode # Python 2
|
||||
except NameError:
|
||||
compat_str = str
|
||||
|
||||
try:
|
||||
compat_basestring = basestring # Python 2
|
||||
except NameError:
|
||||
compat_basestring = str
|
||||
|
||||
try:
|
||||
compat_chr = unichr # Python 2
|
||||
except NameError:
|
||||
compat_chr = chr
|
||||
|
||||
try:
|
||||
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||
except ImportError: # Python 2.6
|
||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||
|
||||
|
||||
try:
|
||||
from urllib.parse import parse_qs as compat_parse_qs
|
||||
@@ -118,7 +143,7 @@ except ImportError: # Python 2
|
||||
|
||||
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
||||
encoding='utf-8', errors='replace'):
|
||||
qs, _coerce_result = qs, unicode
|
||||
qs, _coerce_result = qs, compat_str
|
||||
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||
r = []
|
||||
for name_value in pairs:
|
||||
@@ -157,21 +182,6 @@ except ImportError: # Python 2
|
||||
parsed_result[name] = [value]
|
||||
return parsed_result
|
||||
|
||||
try:
|
||||
compat_str = unicode # Python 2
|
||||
except NameError:
|
||||
compat_str = str
|
||||
|
||||
try:
|
||||
compat_chr = unichr # Python 2
|
||||
except NameError:
|
||||
compat_chr = chr
|
||||
|
||||
try:
|
||||
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||
except ImportError: # Python 2.6
|
||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||
|
||||
try:
|
||||
from shlex import quote as shlex_quote
|
||||
except ImportError: # Python < 3.3
|
||||
@@ -357,6 +367,7 @@ def workaround_optparse_bug9161():
|
||||
|
||||
__all__ = [
|
||||
'compat_HTTPError',
|
||||
'compat_basestring',
|
||||
'compat_chr',
|
||||
'compat_cookiejar',
|
||||
'compat_expanduser',
|
||||
@@ -365,6 +376,7 @@ __all__ = [
|
||||
'compat_html_entities',
|
||||
'compat_html_parser',
|
||||
'compat_http_client',
|
||||
'compat_http_server',
|
||||
'compat_kwargs',
|
||||
'compat_ord',
|
||||
'compat_parse_qs',
|
||||
|
||||
@@ -45,6 +45,12 @@ class ExternalFD(FileDownloader):
|
||||
def supports(cls, info_dict):
|
||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||
|
||||
def _source_address(self, command_option):
|
||||
source_address = self.params.get('source_address')
|
||||
if source_address is None:
|
||||
return []
|
||||
return [command_option, source_address]
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
""" Either overwrite this or implement _make_cmd """
|
||||
cmd = self._make_cmd(tmpfilename, info_dict)
|
||||
@@ -72,6 +78,7 @@ class CurlFD(ExternalFD):
|
||||
cmd = [self.exe, '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
@@ -81,6 +88,7 @@ class WgetFD(ExternalFD):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--bind-address')
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
@@ -96,6 +104,7 @@ class Aria2cFD(ExternalFD):
|
||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
@@ -230,6 +230,23 @@ class F4mFD(FileDownloader):
|
||||
A downloader for f4m manifests or AdobeHDS.
|
||||
"""
|
||||
|
||||
def _get_unencrypted_media(self, doc):
|
||||
media = doc.findall(_add_ns('media'))
|
||||
if not media:
|
||||
self.report_error('No media found')
|
||||
for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
|
||||
doc.findall(_add_ns('drmAdditionalHeaderSet'))):
|
||||
# If id attribute is missing it's valid for all media nodes
|
||||
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
|
||||
if 'id' not in e.attrib:
|
||||
self.report_error('Missing ID in f4m DRM')
|
||||
media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
|
||||
'drmAdditionalHeaderSetId' not in e.attrib,
|
||||
media))
|
||||
if not media:
|
||||
self.report_error('Unsupported DRM')
|
||||
return media
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
@@ -248,7 +265,8 @@ class F4mFD(FileDownloader):
|
||||
)
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||
for f in self._get_unencrypted_media(doc)]
|
||||
if requested_bitrate is None:
|
||||
# get the best format
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
|
||||
@@ -11,6 +11,7 @@ from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
@@ -21,23 +22,22 @@ class HlsFD(FileDownloader):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = [
|
||||
'-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc',
|
||||
encodeFilename(tmpfilename, for_subprocess=True)]
|
||||
|
||||
ffpp = FFmpegPostProcessor(downloader=self)
|
||||
program = ffpp._executable
|
||||
if program is None:
|
||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
return False
|
||||
ffpp.check_version()
|
||||
cmd = [program] + args
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
args = [
|
||||
encodeArgument(opt)
|
||||
for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||
args.append(encodeFilename(tmpfilename, True))
|
||||
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize))
|
||||
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
|
||||
@@ -3,6 +3,9 @@ from __future__ import unicode_literals
|
||||
import os
|
||||
import time
|
||||
|
||||
from socket import error as SocketError
|
||||
import errno
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
@@ -99,6 +102,11 @@ class HttpFD(FileDownloader):
|
||||
resume_len = 0
|
||||
open_mode = 'wb'
|
||||
break
|
||||
except SocketError as e:
|
||||
if e.errno != errno.ECONNRESET:
|
||||
# Connection reset is no problem, just retry
|
||||
raise
|
||||
|
||||
# Retry
|
||||
count += 1
|
||||
if count <= retries:
|
||||
|
||||
@@ -104,6 +104,7 @@ class RtmpFD(FileDownloader):
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn', None)
|
||||
protocol = info_dict.get('rtmp_protocol', None)
|
||||
real_time = info_dict.get('rtmp_real_time', False)
|
||||
no_resume = info_dict.get('no_resume', False)
|
||||
continue_dl = info_dict.get('continuedl', False)
|
||||
|
||||
@@ -143,6 +144,8 @@ class RtmpFD(FileDownloader):
|
||||
basic_args += ['--conn', conn]
|
||||
if protocol is not None:
|
||||
basic_args += ['--protocol', protocol]
|
||||
if real_time:
|
||||
basic_args += ['--realtime']
|
||||
|
||||
args = basic_args
|
||||
if not no_resume and continue_dl and not live:
|
||||
|
||||
@@ -82,6 +82,7 @@ from .crunchyroll import (
|
||||
CrunchyrollShowPlaylistIE
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionPlaylistIE,
|
||||
@@ -89,6 +90,7 @@ from .dailymotion import (
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .dfb import DFBIE
|
||||
from .dotsub import DotsubIE
|
||||
@@ -180,6 +182,7 @@ from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
from .hostingbulk import HostingBulkIE
|
||||
@@ -284,9 +287,19 @@ from .netzkino import NetzkinoIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
from .nextmedia import (
|
||||
NextMediaIE,
|
||||
NextMediaActionNewsIE,
|
||||
AppleDailyRealtimeNewsIE,
|
||||
AppleDailyAnimationNewsIE
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhl import NHLIE, NHLVideocenterIE
|
||||
from .nhl import (
|
||||
NHLIE,
|
||||
NHLNewsIE,
|
||||
NHLVideocenterIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninegag import NineGagIE
|
||||
from .noco import NocoIE
|
||||
@@ -304,7 +317,8 @@ from .nrk import (
|
||||
NRKIE,
|
||||
NRKTVIE,
|
||||
)
|
||||
from .ntv import NTVIE
|
||||
from .ntvde import NTVDeIE
|
||||
from .ntvru import NTVRuIE
|
||||
from .nytimes import NYTimesIE
|
||||
from .nuvid import NuvidIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
@@ -546,6 +560,7 @@ from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
from .xuite import XuiteIE
|
||||
from .xxxymovies import XXXYMoviesIE
|
||||
from .yahoo import (
|
||||
YahooIE,
|
||||
|
||||
@@ -122,7 +122,6 @@ class AppleTrailersIE(InfoExtractor):
|
||||
playlist.append({
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
|
||||
@@ -23,13 +23,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'file': '22429276.mp4',
|
||||
'md5': '469751912f1de0816a9fc9df8336476c',
|
||||
'info_dict': {
|
||||
'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?',
|
||||
'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014',
|
||||
},
|
||||
'skip': 'Blocked outside of Germany',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
|
||||
'info_dict': {
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
|
||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -118,6 +118,9 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@@ -28,12 +28,10 @@ class CinchcastIE(InfoExtractor):
|
||||
item, './{http://developer.longtailvideo.com/trac/}date')
|
||||
upload_date = unified_strdate(date_str, day_first=False)
|
||||
# duration is present but wrong
|
||||
formats = []
|
||||
formats.append({
|
||||
formats = [{
|
||||
'format_id': 'main',
|
||||
'url': item.find(
|
||||
'./{http://search.yahoo.com/mrss/}content').attrib['url'],
|
||||
})
|
||||
'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'],
|
||||
}]
|
||||
backup_url = xpath_text(
|
||||
item, './{http://developer.longtailvideo.com/trac/}backupContent')
|
||||
if backup_url:
|
||||
|
||||
@@ -49,7 +49,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||
)|
|
||||
(?P<interview>
|
||||
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
||||
extended-interviews/(?P<interID>[0-9a-z]+)/
|
||||
(?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
|
||||
(?:/[^/?#]?|[?#]|$))))
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
@@ -62,6 +64,38 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow kristen-stewart part 1',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
|
||||
'info_dict': {
|
||||
'id': 'sarah-chayes-extended-interview',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'title': 'thedailyshow Sarah Chayes Extended Interview',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150129',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow sarah-chayes-extended-interview part 1',
|
||||
},
|
||||
},
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150129',
|
||||
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||
'uploader': 'thedailyshow',
|
||||
'title': 'thedailyshow sarah-chayes-extended-interview part 2',
|
||||
},
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
||||
'only_matching': True,
|
||||
@@ -230,6 +264,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': epTitle,
|
||||
'entries': entries,
|
||||
'title': show_name + ' ' + title,
|
||||
'description': description,
|
||||
|
||||
@@ -89,7 +89,8 @@ class InfoExtractor(object):
|
||||
* player_url SWF Player URL (used for rtmpdump).
|
||||
* protocol The protocol that will be used for the actual
|
||||
download, lower-case.
|
||||
"http", "https", "rtsp", "rtmp", "m3u8" or so.
|
||||
"http", "https", "rtsp", "rtmp", "rtmpe",
|
||||
"m3u8", or "m3u8_native".
|
||||
* preference Order number of this format. If this field is
|
||||
present and not None, the formats get sorted
|
||||
by this field, regardless of all other values.
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601, ExtractorError
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
# https connection failed (Connection reset)
|
||||
_VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
|
||||
'md5': 'a9875cb790252b08431186d741beaabe',
|
||||
'info_dict': {
|
||||
'id': '201501291578109',
|
||||
'ext': 'mp4',
|
||||
'title': '以色列.真主黨交火 3人死亡',
|
||||
'description': 'md5:95e9b295c898b7ff294f09d450178d7d',
|
||||
'timestamp': 1422528540,
|
||||
'upload_date': '20150129',
|
||||
}
|
||||
}, {
|
||||
# News count not appear on page but still available in database
|
||||
'url': 'http://news.cts.com.tw/cts/international/201309/201309031304098.html',
|
||||
'md5': '3aee7e0df7cdff94e43581f54c22619e',
|
||||
'info_dict': {
|
||||
'id': '201309031304098',
|
||||
'ext': 'mp4',
|
||||
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
||||
'description': 'md5:f183feeba3752b683827aab71adad584',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1378205880,
|
||||
'upload_date': '20130903',
|
||||
}
|
||||
}, {
|
||||
# With Youtube embedded video
|
||||
'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
|
||||
'md5': '1d842c771dc94c8c3bca5af2cc1db9c5',
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': 'OVbfO7d0_hQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'iPhone6熱銷 蘋果財報亮眼',
|
||||
'description': 'md5:f395d4f485487bb0f992ed2c4b07aa7d',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20150128',
|
||||
'uploader_id': 'TBSCTS',
|
||||
'uploader': '中華電視公司',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
page = self._download_webpage(url, news_id)
|
||||
|
||||
if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None):
|
||||
feed_url = self._html_search_regex(
|
||||
r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)',
|
||||
page, 'feed url')
|
||||
video_url = self._download_webpage(
|
||||
feed_url, news_id, note='Fetching feed')
|
||||
else:
|
||||
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
||||
youtube_url = self._search_regex(
|
||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url',
|
||||
default=None)
|
||||
if not youtube_url:
|
||||
raise ExtractorError('The news includes no videos!', expected=True)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': youtube_url,
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
|
||||
description = self._html_search_meta('description', page)
|
||||
title = self._html_search_meta('title', page)
|
||||
thumbnail = self._html_search_meta('image', page)
|
||||
|
||||
datetime_str = self._html_search_regex(
|
||||
r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time')
|
||||
# Transform into ISO 8601 format with timezone info
|
||||
datetime_str = datetime_str.replace('/', '-') + ':00+0800'
|
||||
timestamp = parse_iso8601(datetime_str, delimiter=' ')
|
||||
|
||||
return {
|
||||
'id': news_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
|
||||
|
||||
class DctpTvIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
|
||||
_TEST = {
|
||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||
'info_dict': {
|
||||
'id': '1324',
|
||||
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
||||
'ext': 'flv',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
|
||||
version_json = self._download_json(
|
||||
base_url + 'version.json',
|
||||
video_id, note='Determining file version')
|
||||
version = version_json['version_name']
|
||||
info_json = self._download_json(
|
||||
'{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
|
||||
video_id, note='Fetching object ID')
|
||||
object_id = compat_str(info_json['object_id'])
|
||||
meta_json = self._download_json(
|
||||
'{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
|
||||
video_id, note='Downloading metadata')
|
||||
uuid = meta_json['uuid']
|
||||
title = meta_json['title']
|
||||
wide = meta_json['is_wide']
|
||||
if wide:
|
||||
ratio = '16x9'
|
||||
else:
|
||||
ratio = '4x3'
|
||||
play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
|
||||
|
||||
servers_json = self._download_json(
|
||||
'http://www.dctp.tv/streaming_servers/',
|
||||
video_id, note='Downloading server list')
|
||||
url = servers_json[0]['endpoint']
|
||||
|
||||
return {
|
||||
'id': object_id,
|
||||
'title': title,
|
||||
'format': 'rtmp',
|
||||
'url': url,
|
||||
'play_path': play_path,
|
||||
'rtmp_real_time': True,
|
||||
'ext': 'flv',
|
||||
'display_id': video_id
|
||||
}
|
||||
@@ -1,40 +1,38 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class DefenseGouvFrIE(InfoExtractor):
|
||||
IE_NAME = 'defense.gouv.fr'
|
||||
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
|
||||
r'ligthboxvideo/base-de-medias/webtv/(.*)')
|
||||
_VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
|
||||
'file': '11213.mp4',
|
||||
'md5': '75bba6124da7e63d2d60b5244ec9430c',
|
||||
"info_dict": {
|
||||
"title": "attaque-chimique-syrienne-du-21-aout-2013-1"
|
||||
'info_dict': {
|
||||
'id': '11213',
|
||||
'ext': 'mp4',
|
||||
'title': 'attaque-chimique-syrienne-du-21-aout-2013-1'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
title = re.match(self._VALID_URL, url).group(1)
|
||||
title = self._match_id(url)
|
||||
webpage = self._download_webpage(url, title)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r"flashvars.pvg_id=\"(\d+)\";",
|
||||
webpage, 'ID')
|
||||
|
||||
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
||||
+ video_id)
|
||||
info = self._download_webpage(json_url, title,
|
||||
'Downloading JSON config')
|
||||
video_url = json.loads(info)['renditions'][0]['url']
|
||||
info = self._download_json(json_url, title, 'Downloading JSON config')
|
||||
video_url = info['renditions'][0]['url']
|
||||
|
||||
return {'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class DRTVIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)+(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
|
||||
@@ -25,9 +25,15 @@ class DRTVIE(SubtitlesInfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
programcard = self._download_json(
|
||||
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
webpage, 'video id')
|
||||
|
||||
programcard = self._download_json(
|
||||
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
data = programcard['Data'][0]
|
||||
|
||||
title = data['Title']
|
||||
|
||||
@@ -230,12 +230,13 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
class GenerationQuoiIE(InfoExtractor):
|
||||
IE_NAME = 'france2.fr:generation-quoi'
|
||||
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
|
||||
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
|
||||
'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
|
||||
'info_dict': {
|
||||
'id': 'k7FJX8VBcvvLmX4wA5Q',
|
||||
'ext': 'mp4',
|
||||
'title': 'Génération Quoi - Garde à Vous',
|
||||
'uploader': 'Génération Quoi',
|
||||
},
|
||||
@@ -243,14 +244,12 @@ class GenerationQuoiIE(InfoExtractor):
|
||||
# It uses Dailymotion
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only available from France',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name)
|
||||
info_json = self._download_webpage(info_url, name)
|
||||
display_id = self._match_id(url)
|
||||
info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id)
|
||||
info_json = self._download_webpage(info_url, display_id)
|
||||
info = json.loads(info_json)
|
||||
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
|
||||
ie='Dailymotion')
|
||||
|
||||
@@ -498,6 +498,19 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'www.abc.net.au',
|
||||
'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
|
||||
}
|
||||
},
|
||||
# embedded viddler video
|
||||
{
|
||||
'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
|
||||
'info_dict': {
|
||||
'id': '4d03aad9',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'deadspin',
|
||||
'title': 'WALL-TO-GORTAT',
|
||||
'timestamp': 1422285291,
|
||||
'upload_date': '20150126',
|
||||
},
|
||||
'add_ie': ['Viddler'],
|
||||
}
|
||||
]
|
||||
|
||||
@@ -860,9 +873,16 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for embedded Viddler player
|
||||
mobj = re.search(
|
||||
r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Ooyala videos
|
||||
mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
if mobj is not None:
|
||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||
|
||||
@@ -1053,7 +1073,7 @@ class GenericIE(InfoExtractor):
|
||||
found = filter_video(re.findall(r'''(?xs)
|
||||
flowplayer\("[^"]+",\s*
|
||||
\{[^}]+?\}\s*,
|
||||
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
|
||||
\s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
|
||||
["']?url["']?\s*:\s*["']([^"']+)["']
|
||||
''', webpage))
|
||||
if not found:
|
||||
|
||||
@@ -70,6 +70,19 @@ class GloboIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
|
||||
'md5': 'c1defca721ce25b2354e927d3e4b3dec',
|
||||
'info_dict': {
|
||||
'id': '3928201',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas',
|
||||
'duration': 1472.906,
|
||||
'uploader': 'Canal Brasil',
|
||||
'uploader_id': 705,
|
||||
'like_count': int,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
class MD5():
|
||||
@@ -381,11 +394,16 @@ class GloboIE(InfoExtractor):
|
||||
signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)
|
||||
signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
|
||||
|
||||
formats.append({
|
||||
'url': '%s?h=%s&k=%s' % (resource['url'], signed_hash, 'flash'),
|
||||
'format_id': resource_id,
|
||||
'height': resource['height']
|
||||
})
|
||||
resource_url = resource['url']
|
||||
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
|
||||
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': signed_url,
|
||||
'format_id': resource_id,
|
||||
'height': resource.get('height'),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
||||
@@ -83,7 +83,7 @@ class GroovesharkIE(InfoExtractor):
|
||||
return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
|
||||
|
||||
def _transform_bootstrap(self, js):
|
||||
return re.split('(?m)^\s*try\s*{', js)[0] \
|
||||
return re.split('(?m)^\s*try\s*\{', js)[0] \
|
||||
.split(' = ', 1)[1].strip().rstrip(';')
|
||||
|
||||
def _transform_meta(self, js):
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class HistoricFilmsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?historicfilms\.com/(?:tapes/|play)(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.historicfilms.com/tapes/4728',
|
||||
'md5': 'd4a437aec45d8d796a38a215db064e9a',
|
||||
'info_dict': {
|
||||
'id': '4728',
|
||||
'ext': 'mov',
|
||||
'title': 'Historic Films: GP-7',
|
||||
'description': 'md5:1a86a0f3ac54024e419aba97210d959a',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 2096,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
tape_id = self._search_regex(
|
||||
r'class="tapeId">([^<]+)<', webpage, 'tape id')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._html_search_meta(
|
||||
'thumbnailUrl', webpage, 'thumbnails') or self._og_search_thumbnail(webpage)
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, 'duration'))
|
||||
|
||||
video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
}
|
||||
+16
-11
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
class IviIE(InfoExtractor):
|
||||
IE_DESC = 'ivi.ru'
|
||||
IE_NAME = 'ivi'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
# Single movie
|
||||
@@ -63,29 +63,34 @@ class IviIE(InfoExtractor):
|
||||
return int(m.group('commentcount')) if m is not None else 0
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
api_url = 'http://api.digitalaccess.ru/api/json/'
|
||||
|
||||
data = {'method': 'da.content.get',
|
||||
'params': [video_id, {'site': 's183',
|
||||
'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
|
||||
'contentid': video_id
|
||||
}
|
||||
]
|
||||
data = {
|
||||
'method': 'da.content.get',
|
||||
'params': [
|
||||
video_id, {
|
||||
'site': 's183',
|
||||
'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
|
||||
'contentid': video_id
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(api_url, json.dumps(data))
|
||||
|
||||
video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON')
|
||||
video_json_page = self._download_webpage(
|
||||
request, video_id, 'Downloading video JSON')
|
||||
video_json = json.loads(video_json_page)
|
||||
|
||||
if 'error' in video_json:
|
||||
error = video_json['error']
|
||||
if error['origin'] == 'NoRedisValidData':
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True)
|
||||
raise ExtractorError(
|
||||
'Unable to download video %s: %s' % (video_id, error['message']),
|
||||
expected=True)
|
||||
|
||||
result = video_json['result']
|
||||
|
||||
|
||||
@@ -13,17 +13,17 @@ class KankanIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
|
||||
'file': '48863.flv',
|
||||
'md5': '29aca1e47ae68fc28804aca89f29507e',
|
||||
'info_dict': {
|
||||
'id': '48863',
|
||||
'ext': 'flv',
|
||||
'title': 'Ready To Go',
|
||||
},
|
||||
'skip': 'Only available from China',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
|
||||
|
||||
@@ -7,10 +7,6 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_decrypt_text
|
||||
)
|
||||
|
||||
|
||||
@@ -18,9 +14,10 @@ class KeezMoviesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
|
||||
_TEST = {
|
||||
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
|
||||
'file': '1214711.mp4',
|
||||
'md5': '6e297b7e789329923fcf83abb67c9289',
|
||||
'info_dict': {
|
||||
'id': '1214711',
|
||||
'ext': 'mp4',
|
||||
'title': 'Petite Asian Lady Mai Playing In Bathtub',
|
||||
'age_limit': 18,
|
||||
}
|
||||
@@ -39,11 +36,10 @@ class KeezMoviesIE(InfoExtractor):
|
||||
embedded_url = mobj.group(1)
|
||||
return self.url_result(embedded_url)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title')
|
||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, 'video_url'))
|
||||
if 'encrypted=true' in webpage:
|
||||
password = self._html_search_regex(r'video_title=(.+?)&', webpage, 'password')
|
||||
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 [^>]*>([^<]+)', webpage, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL')
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[4].split('_')[:2]
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
@@ -20,9 +18,10 @@ class LA7IE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
|
||||
'file': '50355319.mp4',
|
||||
'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
|
||||
'info_dict': {
|
||||
'id': '50355319',
|
||||
'ext': 'mp4',
|
||||
'title': 'IL DIVO',
|
||||
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
|
||||
'duration': 6254,
|
||||
@@ -31,9 +30,7 @@ class LA7IE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
|
||||
doc = self._download_xml(xml_url, video_id)
|
||||
|
||||
|
||||
@@ -6,13 +6,12 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class LnkGoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi\-video/(?P<show>[^/]+)/ziurek\-(?P<display_id>[A-Za-z0-9\-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
|
||||
'info_dict': {
|
||||
@@ -51,8 +50,7 @@ class LnkGoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, 'Downloading player webpage')
|
||||
@@ -61,6 +59,8 @@ class LnkGoIE(InfoExtractor):
|
||||
r'data-ep="([^"]+)"', webpage, 'video ID')
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
|
||||
|
||||
thumbnail_w = int_or_none(
|
||||
self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False))
|
||||
@@ -75,39 +75,28 @@ class LnkGoIE(InfoExtractor):
|
||||
'height': thumbnail_h,
|
||||
})
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class="meta-item\sair-time">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'VideoDuration = "([^"]+)"', webpage, 'duration', fatal=False))
|
||||
config = self._parse_json(self._search_regex(
|
||||
r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id)
|
||||
|
||||
pg_rating = self._search_regex(
|
||||
r'pgrating="([^"]+)"', webpage, 'PG rating', fatal=False, default='')
|
||||
age_limit = self._AGE_LIMITS.get(pg_rating.upper(), 0)
|
||||
if config.get('pGeo'):
|
||||
self.report_warning(
|
||||
'This content might not be available in your country due to copyright reasons')
|
||||
|
||||
sources_js = self._search_regex(
|
||||
r'(?s)sources:\s(\[.*?\]),', webpage, 'sources')
|
||||
sources = self._parse_json(
|
||||
sources_js, video_id, transform_source=js_to_json)
|
||||
formats = [{
|
||||
'format_id': 'hls',
|
||||
'ext': 'mp4',
|
||||
'url': config['EpisodeVideoLink_HLS'],
|
||||
}]
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
if source.get('provider') == 'rtmp':
|
||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', source['file'])
|
||||
if not m:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'url': m.group('url'),
|
||||
'play_path': m.group('play_path'),
|
||||
'page_url': url,
|
||||
})
|
||||
elif source.get('file').endswith('.m3u8'):
|
||||
formats.append({
|
||||
'format_id': 'hls',
|
||||
'ext': source.get('type', 'mp4'),
|
||||
'url': source['file'],
|
||||
})
|
||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', config['EpisodeVideoLink'])
|
||||
if m:
|
||||
formats.append({
|
||||
'format_id': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'url': m.group('url'),
|
||||
'play_path': m.group('play_path'),
|
||||
'page_url': url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -117,8 +106,8 @@ class LnkGoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': [thumbnail],
|
||||
'duration': duration,
|
||||
'duration': int_or_none(config.get('VideoTime')),
|
||||
'description': description,
|
||||
'age_limit': age_limit,
|
||||
'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0),
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
@@ -13,21 +11,22 @@ class MacGameStoreIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
|
||||
'file': '2450.m4v',
|
||||
'md5': '8649b8ea684b6666b4c5be736ecddc61',
|
||||
'info_dict': {
|
||||
'id': '2450',
|
||||
'ext': 'm4v',
|
||||
'title': 'Crow',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, 'Downloading trailer page')
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading trailer page')
|
||||
|
||||
if re.search(r'>Missing Media<', webpage) is not None:
|
||||
raise ExtractorError('Trailer %s does not exist' % video_id, expected=True)
|
||||
if '>Missing Media<' in webpage:
|
||||
raise ExtractorError(
|
||||
'Trailer %s does not exist' % video_id, expected=True)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
|
||||
|
||||
@@ -9,7 +9,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
@@ -85,15 +85,17 @@ class MixcloudIE(InfoExtractor):
|
||||
uploader_id = self._search_regex(
|
||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
|
||||
like_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
|
||||
r'/favorites/?">([0-9]+)<'],
|
||||
webpage, 'like count', fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
r'/listeners/?">([0-9,.]+)</a>'],
|
||||
webpage, 'play count', fatal=False))
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<time itemprop="dateCreated" datetime="([^"]+)">',
|
||||
webpage, 'upload date'))
|
||||
webpage, 'upload date', default=None))
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
|
||||
@@ -1,21 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class MporaIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
|
||||
_VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
|
||||
IE_NAME = 'MPORA'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
|
||||
'file': 'AAdo8okx4wiz.mp4',
|
||||
'md5': 'a7a228473eedd3be741397cf452932eb',
|
||||
'info_dict': {
|
||||
'id': 'AAdo8okx4wiz',
|
||||
'ext': 'mp4',
|
||||
'title': 'Katy Curd - Winter in the Forest',
|
||||
'duration': 416,
|
||||
'uploader': 'Peter Newman Media',
|
||||
@@ -23,14 +21,12 @@ class MporaIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_json = self._search_regex(
|
||||
r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
|
||||
|
||||
data = json.loads(data_json)
|
||||
data = self._parse_json(data_json, video_id)
|
||||
|
||||
uploader = data['info_overlay'].get('username')
|
||||
duration = data['video']['duration'] // 1000
|
||||
|
||||
+44
-24
@@ -2,10 +2,11 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -22,7 +23,7 @@ def _media_xml_tag(tag):
|
||||
return '{http://search.yahoo.com/mrss/}%s' % tag
|
||||
|
||||
|
||||
class MTVServicesInfoExtractor(InfoExtractor):
|
||||
class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
|
||||
_MOBILE_TEMPLATE = None
|
||||
|
||||
@staticmethod
|
||||
@@ -78,17 +79,42 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
try:
|
||||
_, _, ext = rendition.attrib['type'].partition('/')
|
||||
rtmp_video_url = rendition.find('./src').text
|
||||
formats.append({'ext': ext,
|
||||
'url': self._transform_rtmp_url(rtmp_video_url),
|
||||
'format_id': rendition.get('bitrate'),
|
||||
'width': int(rendition.get('width')),
|
||||
'height': int(rendition.get('height')),
|
||||
})
|
||||
if rtmp_video_url.endswith('siteunavail.png'):
|
||||
continue
|
||||
formats.append({
|
||||
'ext': ext,
|
||||
'url': self._transform_rtmp_url(rtmp_video_url),
|
||||
'format_id': rendition.get('bitrate'),
|
||||
'width': int(rendition.get('width')),
|
||||
'height': int(rendition.get('height')),
|
||||
})
|
||||
except (KeyError, TypeError):
|
||||
raise ExtractorError('Invalid rendition field.')
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _extract_subtitles(self, mdoc, mtvn_id):
|
||||
subtitles = {}
|
||||
FORMATS = {
|
||||
'scc': 'cea-608',
|
||||
'eia-608': 'cea-608',
|
||||
'xml': 'ttml',
|
||||
}
|
||||
subtitles_format = FORMATS.get(
|
||||
self._downloader.params.get('subtitlesformat'), 'ttml')
|
||||
for transcript in mdoc.findall('.//transcript'):
|
||||
if transcript.get('kind') != 'captions':
|
||||
continue
|
||||
lang = transcript.get('srclang')
|
||||
for typographic in transcript.findall('./typographic'):
|
||||
captions_format = typographic.get('format')
|
||||
if captions_format == subtitles_format:
|
||||
subtitles[lang] = compat_str(typographic.get('src'))
|
||||
break
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(mtvn_id, subtitles)
|
||||
return self.extract_subtitles(mtvn_id, subtitles)
|
||||
|
||||
def _get_video_info(self, itemdoc):
|
||||
uri = itemdoc.find('guid').text
|
||||
video_id = self._id_from_uri(uri)
|
||||
@@ -135,6 +161,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
return {
|
||||
'title': title,
|
||||
'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
|
||||
'subtitles': self._extract_subtitles(mediagen_doc, mtvn_id),
|
||||
'id': video_id,
|
||||
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||
'description': description,
|
||||
@@ -167,7 +194,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
mgid = self._search_regex(
|
||||
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
||||
webpage, 'mgid')
|
||||
return self._get_videos_info(mgid)
|
||||
|
||||
videos_info = self._get_videos_info(mgid)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
return
|
||||
return videos_info
|
||||
|
||||
|
||||
class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||
@@ -212,25 +243,14 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
|
||||
'file': '853555.mp4',
|
||||
'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
|
||||
'info_dict': {
|
||||
'id': '853555',
|
||||
'ext': 'mp4',
|
||||
'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
|
||||
'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
||||
},
|
||||
},
|
||||
{
|
||||
'add_ie': ['Vevo'],
|
||||
'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
|
||||
'file': 'USCJY1331283.mp4',
|
||||
'md5': '73b4e7fcadd88929292fe52c3ced8caf',
|
||||
'info_dict': {
|
||||
'title': 'Everything Has Changed',
|
||||
'upload_date': '20130606',
|
||||
'uploader': 'Taylor Swift',
|
||||
},
|
||||
'skip': 'VEVO is only available in some countries',
|
||||
},
|
||||
]
|
||||
|
||||
def _get_thumbnail_url(self, uri, itemdoc):
|
||||
@@ -244,8 +264,8 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Some videos come from Vevo.com
|
||||
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
|
||||
webpage, re.DOTALL)
|
||||
m_vevo = re.search(
|
||||
r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)
|
||||
if m_vevo:
|
||||
vevo_id = m_vevo.group(1)
|
||||
self.to_screen('Vevo video detected: %s' % vevo_id)
|
||||
|
||||
@@ -11,6 +11,7 @@ class NerdCubedFeedIE(InfoExtractor):
|
||||
_TEST = {
|
||||
'url': 'http://www.nerdcubed.co.uk/feed.json',
|
||||
'info_dict': {
|
||||
'id': 'nerdcubed-feed',
|
||||
'title': 'nerdcubed.co.uk feed',
|
||||
},
|
||||
'playlist_mincount': 1300,
|
||||
|
||||
@@ -0,0 +1,163 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class NextMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
|
||||
'md5': 'dff9fad7009311c421176d1ac90bfe4f',
|
||||
'info_dict': {
|
||||
'id': '53109199',
|
||||
'ext': 'mp4',
|
||||
'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:28222b9912b6665a21011b034c70fcc7',
|
||||
'timestamp': 1415456273,
|
||||
'upload_date': '20141108',
|
||||
}
|
||||
}]
|
||||
|
||||
_URL_PATTERN = r'\{ url: \'(.+)\' \}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
page = self._download_webpage(url, news_id)
|
||||
return self._extract_from_nextmedia_page(news_id, url, page)
|
||||
|
||||
def _extract_from_nextmedia_page(self, news_id, url, page):
|
||||
title = self._fetch_title(page)
|
||||
video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
|
||||
|
||||
attrs = {
|
||||
'id': news_id,
|
||||
'title': title,
|
||||
'url': video_url, # ext can be inferred from url
|
||||
'thumbnail': self._fetch_thumbnail(page),
|
||||
'description': self._fetch_description(page),
|
||||
}
|
||||
|
||||
timestamp = self._fetch_timestamp(page)
|
||||
if timestamp:
|
||||
attrs['timestamp'] = timestamp
|
||||
else:
|
||||
attrs['upload_date'] = self._fetch_upload_date(url)
|
||||
|
||||
return attrs
|
||||
|
||||
def _fetch_title(self, page):
|
||||
return self._og_search_title(page)
|
||||
|
||||
def _fetch_thumbnail(self, page):
|
||||
return self._og_search_thumbnail(page)
|
||||
|
||||
def _fetch_timestamp(self, page):
|
||||
dateCreated = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time')
|
||||
return parse_iso8601(dateCreated)
|
||||
|
||||
def _fetch_upload_date(self, url):
|
||||
return self._search_regex(self._VALID_URL, url, 'upload date', group='date')
|
||||
|
||||
def _fetch_description(self, page):
|
||||
return self._og_search_property('description', page)
|
||||
|
||||
|
||||
class NextMediaActionNewsIE(NextMediaIE):
|
||||
_VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
|
||||
_TESTS = [{
|
||||
'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
|
||||
'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
|
||||
'info_dict': {
|
||||
'id': '19009428',
|
||||
'ext': 'mp4',
|
||||
'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
|
||||
'timestamp': 1421791200,
|
||||
'upload_date': '20150120',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
actionnews_page = self._download_webpage(url, news_id)
|
||||
article_url = self._og_search_url(actionnews_page)
|
||||
article_page = self._download_webpage(article_url, news_id)
|
||||
return self._extract_from_nextmedia_page(news_id, url, article_page)
|
||||
|
||||
|
||||
class AppleDailyRealtimeNewsIE(NextMediaIE):
|
||||
_VALID_URL = r'http://(www|ent).appledaily.com.tw/(realtimenews|enews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
||||
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
|
||||
'info_dict': {
|
||||
'id': '36354694',
|
||||
'ext': 'mp4',
|
||||
'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:b23787119933404ce515c6356a8c355c',
|
||||
'upload_date': '20150128',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A',
|
||||
'md5': '86b4e9132d158279c7883822d94ccc49',
|
||||
'info_dict': {
|
||||
'id': '550549',
|
||||
'ext': 'mp4',
|
||||
'title': '不滿被踩腳 山東兩大媽一路打下車',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:2648aaf6fc4f401f6de35a91d111aa1d',
|
||||
'upload_date': '20150128',
|
||||
}
|
||||
}]
|
||||
|
||||
_URL_PATTERN = r'\{url: \'(.+)\'\}'
|
||||
|
||||
def _fetch_title(self, page):
|
||||
return self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title')
|
||||
|
||||
def _fetch_thumbnail(self, page):
|
||||
return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
|
||||
|
||||
def _fetch_timestamp(self, page):
|
||||
return None
|
||||
|
||||
|
||||
class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
|
||||
_VALID_URL = 'http://www.appledaily.com.tw/animation/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
|
||||
'md5': '03df296d95dedc2d5886debbb80cb43f',
|
||||
'info_dict': {
|
||||
'id': '5003671',
|
||||
'ext': 'mp4',
|
||||
'title': '20正妹熱舞 《刀龍傳說Online》火辣上市',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
|
||||
'upload_date': '20150128',
|
||||
}
|
||||
}, {
|
||||
# No thumbnail
|
||||
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/',
|
||||
'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb',
|
||||
'info_dict': {
|
||||
'id': '5003673',
|
||||
'ext': 'mp4',
|
||||
'title': '半夜尿尿 好像會看到___',
|
||||
'description': 'md5:61d2da7fe117fede148706cdb85ac066',
|
||||
'upload_date': '20150128',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'video thumbnail',
|
||||
]
|
||||
}]
|
||||
|
||||
def _fetch_title(self, page):
|
||||
return self._html_search_meta('description', page, 'news title')
|
||||
|
||||
def _fetch_description(self, page):
|
||||
return self._html_search_meta('description', page, 'news description')
|
||||
@@ -20,6 +20,12 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
def _fix_json(json_string):
|
||||
return json_string.replace('\\\'', '\'')
|
||||
|
||||
def _real_extract_video(self, video_id):
|
||||
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
|
||||
data = self._download_json(
|
||||
json_url, video_id, transform_source=self._fix_json)
|
||||
return self._extract_video(data[0])
|
||||
|
||||
def _extract_video(self, info):
|
||||
video_id = info['id']
|
||||
self.report_extraction(video_id)
|
||||
@@ -54,7 +60,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
class NHLIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||
@@ -92,15 +98,41 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://video.nhl.com/videocenter/?id=736722',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
|
||||
data = self._download_json(
|
||||
json_url, video_id, transform_source=self._fix_json)
|
||||
return self._extract_video(data[0])
|
||||
video_id = self._match_id(url)
|
||||
return self._real_extract_video(video_id)
|
||||
|
||||
|
||||
class NHLNewsIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com:news'
|
||||
IE_DESC = 'NHL news'
|
||||
_VALID_URL = r'https?://(?:www\.)?nhl\.com/ice/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nhl.com/ice/news.htm?id=750727',
|
||||
'md5': '4b3d1262e177687a3009937bd9ec0be8',
|
||||
'info_dict': {
|
||||
'id': '736722',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cal Clutterbuck has been fined $2,000',
|
||||
'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6',
|
||||
'duration': 37,
|
||||
'upload_date': '20150128',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
news_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, news_id)
|
||||
video_id = self._search_regex(
|
||||
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'"],
|
||||
webpage, 'video id')
|
||||
return self._real_extract_video(video_id)
|
||||
|
||||
|
||||
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class NTVDeIE(InfoExtractor):
|
||||
IE_NAME = 'n-tv.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/videos/[^/?#]+/[^/?#]+-article(?P<id>.+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html',
|
||||
'md5': '6ef2514d4b1e8e03ca24b49e2f167153',
|
||||
'info_dict': {
|
||||
'id': '14438086',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus',
|
||||
'alt_title': 'Winterchaos auf deutschen Straßen',
|
||||
'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.',
|
||||
'duration': 4020,
|
||||
'timestamp': 1422892797,
|
||||
'upload_date': '20150202',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info = self._parse_json(self._search_regex(
|
||||
r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'),
|
||||
video_id, transform_source=js_to_json)
|
||||
timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
|
||||
vdata = self._parse_json(self._search_regex(
|
||||
r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);',
|
||||
webpage, 'player data'),
|
||||
video_id, transform_source=js_to_json)
|
||||
duration = parse_duration(vdata.get('duration'))
|
||||
formats = [{
|
||||
'format_id': 'flash',
|
||||
'url': 'rtmp://fms.n-tv.de/' + vdata['video'],
|
||||
}, {
|
||||
'format_id': 'mobile',
|
||||
'url': 'http://video.n-tv.de' + vdata['videoMp4'],
|
||||
'tbr': 400, # estimation
|
||||
}]
|
||||
m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8']
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', preference=0))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['headline'],
|
||||
'description': info.get('intro'),
|
||||
'alt_title': info.get('kicker'),
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': vdata.get('html5VideoPoster'),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -1,15 +1,14 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unescapeHTML
|
||||
)
|
||||
|
||||
|
||||
class NTVIE(InfoExtractor):
|
||||
class NTVRuIE(InfoExtractor):
|
||||
IE_NAME = 'ntv.ru'
|
||||
_VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
|
||||
|
||||
_TESTS = [
|
||||
@@ -92,9 +91,7 @@ class NTVIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
|
||||
@@ -6,12 +6,13 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class RingTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
"url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30",
|
||||
"file": "857645.mp4",
|
||||
"md5": "d25945f5df41cdca2d2587165ac28720",
|
||||
"info_dict": {
|
||||
'id': '857645',
|
||||
'ext': 'mp4',
|
||||
"title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
|
||||
"description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
|
||||
}
|
||||
|
||||
@@ -10,8 +10,9 @@ class RottenTomatoesIE(VideoDetectiveIE):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
|
||||
'file': '613340.mp4',
|
||||
'info_dict': {
|
||||
'id': '613340',
|
||||
'ext': 'mp4',
|
||||
'title': 'TOY STORY 3',
|
||||
'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
|
||||
},
|
||||
|
||||
@@ -57,7 +57,7 @@ def _decrypt_url(png):
|
||||
class RTVEALaCartaIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:alacarta'
|
||||
IE_DESC = 'RTVE a la carta'
|
||||
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'http://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
||||
@@ -74,7 +74,11 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
'id': '1694255',
|
||||
'ext': 'flv',
|
||||
'title': 'TODO',
|
||||
}
|
||||
},
|
||||
'skip': 'The f4m manifest can\'t be used yet',
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -86,6 +90,18 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
if not video_url.endswith('.f4m'):
|
||||
auth_url = video_url.replace(
|
||||
'resources/', 'auth/resources/'
|
||||
).replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||
video_path = self._download_webpage(
|
||||
auth_url, video_id, 'Getting video url')
|
||||
# Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get
|
||||
# the right Content-Length header and the mp4 format
|
||||
video_url = (
|
||||
'http://mvod.akcdn.rtve.es/{0}&v=2.6.8'
|
||||
'&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
|
||||
)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
||||
@@ -162,10 +162,8 @@ class RUTVIE(InfoExtractor):
|
||||
'vbr': int(quality),
|
||||
}
|
||||
elif transport == 'm3u8':
|
||||
fmt = {
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
}
|
||||
formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4'))
|
||||
continue
|
||||
else:
|
||||
fmt = {
|
||||
'url': url
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -13,10 +11,15 @@ class ServingSysIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
|
||||
'info_dict': {
|
||||
'id': '5349193',
|
||||
'title': 'AdAPPter_Hyundai_demo',
|
||||
},
|
||||
'playlist': [{
|
||||
'file': '29955898.flv',
|
||||
'md5': 'baed851342df6846eb8677a60a011a0f',
|
||||
'info_dict': {
|
||||
'id': '29955898',
|
||||
'ext': 'flv',
|
||||
'title': 'AdAPPter_Hyundai_demo (1)',
|
||||
'duration': 74,
|
||||
'tbr': 1378,
|
||||
@@ -24,9 +27,10 @@ class ServingSysIE(InfoExtractor):
|
||||
'height': 400,
|
||||
},
|
||||
}, {
|
||||
'file': '29907998.flv',
|
||||
'md5': '979b4da2655c4bc2d81aeb915a8c5014',
|
||||
'info_dict': {
|
||||
'id': '29907998',
|
||||
'ext': 'flv',
|
||||
'title': 'AdAPPter_Hyundai_demo (2)',
|
||||
'duration': 34,
|
||||
'width': 854,
|
||||
@@ -37,14 +41,13 @@ class ServingSysIE(InfoExtractor):
|
||||
'params': {
|
||||
'playlistend': 2,
|
||||
},
|
||||
'skip': 'Blocked in the US [sic]',
|
||||
'_skip': 'Blocked in the US [sic]',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
pl_id = mobj.group('id')
|
||||
|
||||
pl_id = self._match_id(url)
|
||||
vast_doc = self._download_xml(url, pl_id)
|
||||
|
||||
title = vast_doc.find('.//AdTitle').text
|
||||
media = vast_doc.find('.//MediaFile').text
|
||||
info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
|
||||
|
||||
@@ -11,7 +11,7 @@ from ..compat import (
|
||||
|
||||
|
||||
class SinaIE(InfoExtractor):
|
||||
_VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
|
||||
_VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/
|
||||
(
|
||||
(.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
|
||||
|
|
||||
@@ -23,9 +23,10 @@ class SinaIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
|
||||
'file': '110028898.flv',
|
||||
'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
|
||||
'info_dict': {
|
||||
'id': '110028898',
|
||||
'ext': 'flv',
|
||||
'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
|
||||
}
|
||||
},
|
||||
@@ -39,10 +40,6 @@ class SinaIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
|
||||
|
||||
def _extract_video(self, video_id):
|
||||
data = compat_urllib_parse.urlencode({'vid': video_id})
|
||||
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
|
||||
@@ -59,7 +56,7 @@ class SinaIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
if mobj.group('token') is not None:
|
||||
# The video id is in the redirected url
|
||||
|
||||
@@ -108,7 +108,7 @@ class SmotriIE(InfoExtractor):
|
||||
# swf player
|
||||
{
|
||||
'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
|
||||
'md5': '4d47034979d9390d14acdf59c4935bc2',
|
||||
'md5': '31099eeb4bc906712c5f40092045108d',
|
||||
'info_dict': {
|
||||
'id': 'v9188090500',
|
||||
'ext': 'mp4',
|
||||
@@ -139,9 +139,6 @@ class SmotriIE(InfoExtractor):
|
||||
def _search_meta(self, name, html, display_name=None):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
return self._html_search_regex(
|
||||
r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
|
||||
html, display_name, fatal=False)
|
||||
return self._html_search_meta(name, html, display_name)
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -246,6 +246,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
_TESTS = [{
|
||||
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
|
||||
'info_dict': {
|
||||
'id': '2284613',
|
||||
'title': 'The Royal Concept EP',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
@@ -279,7 +280,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
|
||||
'id': info['id'],
|
||||
'id': '%s' % info['id'],
|
||||
'title': info['title'],
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
|
||||
class SpikeIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(www\.spike\.com/(video-clips|episodes)/.+|
|
||||
m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+))
|
||||
(?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+|
|
||||
m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
|
||||
@@ -25,8 +23,7 @@ class SpikeIE(MTVServicesInfoExtractor):
|
||||
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.search(self._VALID_URL, url)
|
||||
mobile_id = mobj.group('mobile_id')
|
||||
if mobile_id is not None:
|
||||
mobile_id = self._match_id(url)
|
||||
if mobile_id:
|
||||
url = 'http://www.spike.com/video-clips/%s' % mobile_id
|
||||
return super(SpikeIE, self)._real_extract(url)
|
||||
|
||||
@@ -8,7 +8,7 @@ from ..utils import js_to_json
|
||||
|
||||
|
||||
class SRMediathekIE(InfoExtractor):
|
||||
IE_DESC = 'Süddeutscher Rundfunk'
|
||||
IE_DESC = 'Saarländischer Rundfunk'
|
||||
_VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
|
||||
@@ -10,17 +10,19 @@ class TeamcocoIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
|
||||
'file': '80187.mp4',
|
||||
'md5': '3f7746aa0dc86de18df7539903d399ea',
|
||||
'info_dict': {
|
||||
'id': '80187',
|
||||
'ext': 'mp4',
|
||||
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
|
||||
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
||||
'file': '19705.mp4',
|
||||
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
|
||||
'info_dict': {
|
||||
'id': '19705',
|
||||
'ext': 'mp4',
|
||||
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
||||
"title": "Louis C.K. Interview Pt. 1 11/3/11"
|
||||
}
|
||||
@@ -36,7 +38,7 @@ class TeamcocoIE(InfoExtractor):
|
||||
video_id = mobj.group("video_id")
|
||||
if not video_id:
|
||||
video_id = self._html_search_regex(
|
||||
r'data-node-id="(\d+?)"',
|
||||
r'<div\s+class="player".*?data-id="(\d+?)"',
|
||||
webpage, 'video id')
|
||||
|
||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||
|
||||
@@ -11,6 +11,7 @@ class TeleTaskIE(InfoExtractor):
|
||||
_TEST = {
|
||||
'url': 'http://www.tele-task.de/archive/video/html5/26168/',
|
||||
'info_dict': {
|
||||
'id': '26168',
|
||||
'title': 'Duplicate Detection',
|
||||
},
|
||||
'playlist': [{
|
||||
@@ -34,7 +35,6 @@ class TeleTaskIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
lecture_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, lecture_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
|
||||
@@ -16,8 +16,9 @@ class TouTvIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.tou.tv/30-vies/S04E41',
|
||||
'file': '30-vies_S04E41.mp4',
|
||||
'info_dict': {
|
||||
'id': '30-vies_S04E41',
|
||||
'ext': 'mp4',
|
||||
'title': '30 vies Saison 4 / Épisode 41',
|
||||
'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
|
||||
'age_limit': 8,
|
||||
|
||||
@@ -5,27 +5,58 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_urllib_request
|
||||
)
|
||||
|
||||
|
||||
class ViddlerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
"url": "http://www.viddler.com/v/43903784",
|
||||
_TESTS = [{
|
||||
'url': 'http://www.viddler.com/v/43903784',
|
||||
'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
|
||||
'info_dict': {
|
||||
'id': '43903784',
|
||||
'ext': 'mp4',
|
||||
"title": "Video Made Easy",
|
||||
'description': 'You don\'t need to be a professional to make high-quality video content. Viddler provides some quick and easy tips on how to produce great video content with limited resources. ',
|
||||
"uploader": "viddler",
|
||||
'title': 'Video Made Easy',
|
||||
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
|
||||
'uploader': 'viddler',
|
||||
'timestamp': 1335371429,
|
||||
'upload_date': '20120425',
|
||||
"duration": 100.89,
|
||||
'duration': 100.89,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viddler.com/v/4d03aad9/',
|
||||
'md5': 'faa71fbf70c0bee7ab93076fd007f4b0',
|
||||
'info_dict': {
|
||||
'id': '4d03aad9',
|
||||
'ext': 'mp4',
|
||||
'title': 'WALL-TO-GORTAT',
|
||||
'upload_date': '20150126',
|
||||
'uploader': 'deadspin',
|
||||
'timestamp': 1422285291,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viddler.com/player/221ebbbd/0/',
|
||||
'md5': '0defa2bd0ea613d14a6e9bd1db6be326',
|
||||
'info_dict': {
|
||||
'id': '221ebbbd',
|
||||
'ext': 'mp4',
|
||||
'title': 'LETeens-Grammar-snack-third-conditional',
|
||||
'description': ' ',
|
||||
'upload_date': '20140929',
|
||||
'uploader': 'BCLETeens',
|
||||
'timestamp': 1411997190,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -33,14 +64,17 @@ class ViddlerIE(InfoExtractor):
|
||||
json_url = (
|
||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
|
||||
video_id)
|
||||
data = self._download_json(json_url, video_id)['video']
|
||||
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
|
||||
request = compat_urllib_request.Request(json_url, None, headers)
|
||||
data = self._download_json(request, video_id)['video']
|
||||
|
||||
formats = []
|
||||
for filed in data['files']:
|
||||
if filed.get('status', 'ready') != 'ready':
|
||||
continue
|
||||
format_id = filed.get('profile_id') or filed['profile_name']
|
||||
f = {
|
||||
'format_id': filed['profile_id'],
|
||||
'format_id': format_id,
|
||||
'format_note': filed['profile_name'],
|
||||
'url': self._proto_relative_url(filed['url']),
|
||||
'width': int_or_none(filed.get('width')),
|
||||
@@ -53,16 +87,15 @@ class ViddlerIE(InfoExtractor):
|
||||
|
||||
if filed.get('cdn_url'):
|
||||
f = f.copy()
|
||||
f['url'] = self._proto_relative_url(filed['cdn_url'])
|
||||
f['format_id'] = filed['profile_id'] + '-cdn'
|
||||
f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:')
|
||||
f['format_id'] = format_id + '-cdn'
|
||||
f['source_preference'] = 1
|
||||
formats.append(f)
|
||||
|
||||
if filed.get('html5_video_source'):
|
||||
f = f.copy()
|
||||
f['url'] = self._proto_relative_url(
|
||||
filed['html5_video_source'])
|
||||
f['format_id'] = filed['profile_id'] + '-html5'
|
||||
f['url'] = self._proto_relative_url(filed['html5_video_source'])
|
||||
f['format_id'] = format_id + '-html5'
|
||||
f['source_preference'] = 0
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
@@ -71,7 +104,6 @@ class ViddlerIE(InfoExtractor):
|
||||
t.get('text') for t in data.get('tags', []) if 'text' in t]
|
||||
|
||||
return {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
@@ -81,5 +113,6 @@ class ViddlerIE(InfoExtractor):
|
||||
'uploader': data.get('author'),
|
||||
'duration': float_or_none(data.get('length')),
|
||||
'view_count': int_or_none(data.get('view_count')),
|
||||
'comment_count': int_or_none(data.get('comment_count')),
|
||||
'categories': categories,
|
||||
}
|
||||
|
||||
@@ -501,9 +501,10 @@ class VimeoReviewIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
|
||||
'file': '75524534.mp4',
|
||||
'md5': 'c507a72f780cacc12b2248bb4006d253',
|
||||
'info_dict': {
|
||||
'id': '75524534',
|
||||
'ext': 'mp4',
|
||||
'title': "DICK HARDWICK 'Comedian'",
|
||||
'uploader': 'Richard Hardwick',
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -11,9 +12,10 @@ from ..utils import (
|
||||
|
||||
class WashingtonPostIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||
'info_dict': {
|
||||
'id': 'sinkhole-of-bureaucracy',
|
||||
'title': 'Sinkhole of bureaucracy',
|
||||
},
|
||||
'playlist': [{
|
||||
@@ -40,15 +42,38 @@ class WashingtonPostIE(InfoExtractor):
|
||||
'upload_date': '20140322',
|
||||
'uploader': 'The Washington Post',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'http://www.washingtonpost.com/blogs/wonkblog/wp/2014/12/31/one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear/',
|
||||
'info_dict': {
|
||||
'id': 'one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear',
|
||||
'title': 'One airline figured out how to make sure its airplanes never disappear',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'a7c1b5634ba5e57a6a82cdffa5b1e0d0',
|
||||
'info_dict': {
|
||||
'id': '0e4bb54c-9065-11e4-a66f-0ca5037a597d',
|
||||
'ext': 'mp4',
|
||||
'description': 'Washington Post transportation reporter Ashley Halsey III explains why a plane\'s black box needs to be recovered from a crash site instead of having its information streamed in real time throughout the flight.',
|
||||
'upload_date': '20141230',
|
||||
'uploader': 'The Washington Post',
|
||||
'timestamp': 1419974765,
|
||||
'title': 'Why black boxes don’t transmit data in real time',
|
||||
}
|
||||
}]
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
|
||||
|
||||
uuids = re.findall(r'''(?x)
|
||||
(?:
|
||||
<div\s+class="posttv-video-embed[^>]*?data-uuid=|
|
||||
data-video-uuid=
|
||||
)"([^"]+)"''', webpage)
|
||||
entries = []
|
||||
for i, uuid in enumerate(uuids, start=1):
|
||||
vinfo_all = self._download_json(
|
||||
@@ -75,10 +100,11 @@ class WashingtonPostIE(InfoExtractor):
|
||||
'filesize': s.get('fileSize'),
|
||||
'url': s.get('url'),
|
||||
'ext': 'mp4',
|
||||
'preference': -100 if s.get('type') == 'smil' else None,
|
||||
'protocol': {
|
||||
'MP4': 'http',
|
||||
'F4F': 'f4m',
|
||||
}.get(s.get('type'))
|
||||
}.get(s.get('type')),
|
||||
} for s in vinfo.get('streams', [])]
|
||||
source_media_url = vinfo.get('sourceMediaURL')
|
||||
if source_media_url:
|
||||
|
||||
@@ -71,6 +71,9 @@ class WDRIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
|
||||
'playlist_mincount': 146,
|
||||
'info_dict': {
|
||||
'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class XuiteIE(InfoExtractor):
|
||||
_REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?'
|
||||
_VALID_URL = r'https?://vlog\.xuite\.net/(?:play|embed)/(?P<id>%s)' % _REGEX_BASE64
|
||||
_TESTS = [{
|
||||
# Audio
|
||||
'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
|
||||
'md5': '63a42c705772aa53fd4c1a0027f86adf',
|
||||
'info_dict': {
|
||||
'id': '3860914',
|
||||
'ext': 'mp3',
|
||||
'title': '孤單南半球-歐德陽',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 247.246,
|
||||
'timestamp': 1314932940,
|
||||
'upload_date': '20110902',
|
||||
'uploader': '阿能',
|
||||
'uploader_id': '15973816',
|
||||
'categories': ['個人短片'],
|
||||
},
|
||||
}, {
|
||||
# Video with only one format
|
||||
'url': 'http://vlog.xuite.net/play/TkRZNjhULTM0NDE2MjkuZmx2',
|
||||
'md5': 'c45737fc8ac5dc8ac2f92ecbcecf505e',
|
||||
'info_dict': {
|
||||
'id': '3441629',
|
||||
'ext': 'mp4',
|
||||
'title': '孫燕姿 - 眼淚成詩',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 217.399,
|
||||
'timestamp': 1299383640,
|
||||
'upload_date': '20110306',
|
||||
'uploader': 'Valen',
|
||||
'uploader_id': '10400126',
|
||||
'categories': ['影視娛樂'],
|
||||
},
|
||||
}, {
|
||||
# Video with two formats
|
||||
'url': 'http://vlog.xuite.net/play/bWo1N1pLLTIxMzAxMTcwLmZsdg==',
|
||||
'md5': '1166e0f461efe55b62e26a2d2a68e6de',
|
||||
'info_dict': {
|
||||
'id': '21301170',
|
||||
'ext': 'mp4',
|
||||
'title': '暗殺教室 02',
|
||||
'description': '字幕:【極影字幕社】',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 1384.907,
|
||||
'timestamp': 1421481240,
|
||||
'upload_date': '20150117',
|
||||
'uploader': '我只是想認真點',
|
||||
'uploader_id': '242127761',
|
||||
'categories': ['電玩動漫'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_flv_config(self, media_id):
|
||||
base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
|
||||
flv_config = self._download_xml(
|
||||
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
|
||||
'flv config')
|
||||
prop_dict = {}
|
||||
for prop in flv_config.findall('./property'):
|
||||
prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
|
||||
# CDATA may be empty in flv config
|
||||
if not prop.text:
|
||||
continue
|
||||
encoded_content = base64.b64decode(prop.text).decode('utf-8')
|
||||
prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
|
||||
return prop_dict
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
error_msg = self._search_regex(
|
||||
r'<div id="error-message-content">([^<]+)',
|
||||
webpage, 'error message', default=None)
|
||||
if error_msg:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_msg),
|
||||
expected=True)
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-mediaid="(\d+)"', webpage, 'media id')
|
||||
flv_config = self._extract_flv_config(video_id)
|
||||
|
||||
FORMATS = {
|
||||
'audio': 'mp3',
|
||||
'video': 'mp4',
|
||||
}
|
||||
|
||||
formats = []
|
||||
for format_tag in ('src', 'hq_src'):
|
||||
video_url = flv_config.get(format_tag)
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = self._search_regex(
|
||||
r'\bq=(.+?)\b', video_url, 'format id', default=format_tag)
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': FORMATS.get(flv_config['type'], 'mp4'),
|
||||
'format_id': format_id,
|
||||
'height': int(format_id) if format_id.isnumeric() else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = flv_config.get('publish_datetime')
|
||||
if timestamp:
|
||||
timestamp = parse_iso8601(timestamp + ' +0800', ' ')
|
||||
|
||||
category = flv_config.get('category')
|
||||
categories = [category] if category else []
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': flv_config['title'],
|
||||
'description': flv_config.get('description'),
|
||||
'thumbnail': flv_config.get('thumb'),
|
||||
'timestamp': timestamp,
|
||||
'uploader': flv_config.get('author_name'),
|
||||
'uploader_id': flv_config.get('author_id'),
|
||||
'duration': parse_duration(flv_config.get('duration')),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -809,6 +809,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
player_url = None
|
||||
|
||||
# Get video info
|
||||
embed_webpage = None
|
||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||
age_gate = True
|
||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||
@@ -1016,10 +1017,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
url += '&signature=' + url_data['sig'][0]
|
||||
elif 's' in url_data:
|
||||
encrypted_sig = url_data['s'][0]
|
||||
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
|
||||
|
||||
jsplayer_url_json = self._search_regex(
|
||||
r'"assets":.+?"js":\s*("[^"]+")',
|
||||
embed_webpage if age_gate else video_webpage, 'JS player URL')
|
||||
ASSETS_RE,
|
||||
embed_webpage if age_gate else video_webpage,
|
||||
'JS player URL (1)', default=None)
|
||||
if not jsplayer_url_json and not age_gate:
|
||||
# We need the embed website after all
|
||||
if embed_webpage is None:
|
||||
embed_url = proto + '://www.youtube.com/embed/%s' % video_id
|
||||
embed_webpage = self._download_webpage(
|
||||
embed_url, video_id, 'Downloading embed webpage')
|
||||
jsplayer_url_json = self._search_regex(
|
||||
ASSETS_RE, embed_webpage, 'JS player URL')
|
||||
|
||||
player_url = json.loads(jsplayer_url_json)
|
||||
if player_url is None:
|
||||
player_url_json = self._search_regex(
|
||||
@@ -1148,6 +1160,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
|
||||
'info_dict': {
|
||||
'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
|
||||
'title': 'YDL_Empty_List',
|
||||
},
|
||||
'playlist_count': 0,
|
||||
@@ -1156,6 +1169,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
||||
'info_dict': {
|
||||
'title': '29C3: Not my department',
|
||||
'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
||||
},
|
||||
'playlist_count': 95,
|
||||
}, {
|
||||
@@ -1163,6 +1177,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'PLBB231211A4F62143',
|
||||
'info_dict': {
|
||||
'title': '[OLD]Team Fortress 2 (Class-based LP)',
|
||||
'id': 'PLBB231211A4F62143',
|
||||
},
|
||||
'playlist_mincount': 26,
|
||||
}, {
|
||||
@@ -1170,12 +1185,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
|
||||
'info_dict': {
|
||||
'title': 'Uploads from Cauchemar',
|
||||
'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
|
||||
},
|
||||
'playlist_mincount': 799,
|
||||
}, {
|
||||
'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
|
||||
'info_dict': {
|
||||
'title': 'YDL_safe_search',
|
||||
'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
@@ -1184,6 +1201,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'JODA15',
|
||||
'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
|
||||
}
|
||||
}, {
|
||||
'note': 'Embedded SWF player',
|
||||
@@ -1191,12 +1209,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'JODA7',
|
||||
'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
|
||||
}
|
||||
}, {
|
||||
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
|
||||
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
|
||||
'info_dict': {
|
||||
'title': 'Uploads from Interstellar Movie',
|
||||
'title': 'Uploads from Interstellar Movie',
|
||||
'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
|
||||
},
|
||||
'playlist_mincout': 21,
|
||||
}]
|
||||
@@ -1302,6 +1322,9 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
'note': 'paginated channel',
|
||||
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'playlist_mincount': 91,
|
||||
'info_dict': {
|
||||
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
}
|
||||
}]
|
||||
|
||||
def extract_videos_from_page(self, page):
|
||||
@@ -1688,6 +1711,7 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
||||
feature=[a-z_]+|
|
||||
annotation_id=annotation_[^&]+|
|
||||
x-yt-cl=[0-9]+|
|
||||
hl=[^&]*|
|
||||
)?
|
||||
|
|
||||
attribution_link\?a=[^&]+
|
||||
@@ -1707,6 +1731,9 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?feature=foo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/watch?hl=en-GB',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
+126
-46
@@ -1,59 +1,122 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import operator
|
||||
import re
|
||||
|
||||
from .utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
_OPERATORS = [
|
||||
('|', operator.or_),
|
||||
('^', operator.xor),
|
||||
('&', operator.and_),
|
||||
('>>', operator.rshift),
|
||||
('<<', operator.lshift),
|
||||
('-', operator.sub),
|
||||
('+', operator.add),
|
||||
('%', operator.mod),
|
||||
('/', operator.truediv),
|
||||
('*', operator.mul),
|
||||
]
|
||||
_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
|
||||
_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
|
||||
|
||||
_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
|
||||
|
||||
|
||||
class JSInterpreter(object):
|
||||
def __init__(self, code):
|
||||
self.code = code
|
||||
def __init__(self, code, objects=None):
|
||||
if objects is None:
|
||||
objects = {}
|
||||
self.code = self._remove_comments(code)
|
||||
self._functions = {}
|
||||
self._objects = {}
|
||||
self._objects = objects
|
||||
|
||||
def interpret_statement(self, stmt, local_vars, allow_recursion=20):
|
||||
def _remove_comments(self, code):
|
||||
return re.sub(r'(?s)/\*.*?\*/', '', code)
|
||||
|
||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||
if allow_recursion < 0:
|
||||
raise ExtractorError('Recursion limit reached')
|
||||
|
||||
if stmt.startswith('var '):
|
||||
stmt = stmt[len('var '):]
|
||||
ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
|
||||
r'=(?P<expr>.*)$', stmt)
|
||||
if ass_m:
|
||||
if ass_m.groupdict().get('index'):
|
||||
def assign(val):
|
||||
lvar = local_vars[ass_m.group('out')]
|
||||
idx = self.interpret_expression(
|
||||
ass_m.group('index'), local_vars, allow_recursion)
|
||||
assert isinstance(idx, int)
|
||||
lvar[idx] = val
|
||||
return val
|
||||
expr = ass_m.group('expr')
|
||||
else:
|
||||
def assign(val):
|
||||
local_vars[ass_m.group('out')] = val
|
||||
return val
|
||||
expr = ass_m.group('expr')
|
||||
elif stmt.startswith('return '):
|
||||
assign = lambda v: v
|
||||
expr = stmt[len('return '):]
|
||||
should_abort = False
|
||||
stmt = stmt.lstrip()
|
||||
stmt_m = re.match(r'var\s', stmt)
|
||||
if stmt_m:
|
||||
expr = stmt[len(stmt_m.group(0)):]
|
||||
else:
|
||||
# Try interpreting it as an expression
|
||||
expr = stmt
|
||||
assign = lambda v: v
|
||||
return_m = re.match(r'return(?:\s+|$)', stmt)
|
||||
if return_m:
|
||||
expr = stmt[len(return_m.group(0)):]
|
||||
should_abort = True
|
||||
else:
|
||||
# Try interpreting it as an expression
|
||||
expr = stmt
|
||||
|
||||
v = self.interpret_expression(expr, local_vars, allow_recursion)
|
||||
return assign(v)
|
||||
return v, should_abort
|
||||
|
||||
def interpret_expression(self, expr, local_vars, allow_recursion):
|
||||
expr = expr.strip()
|
||||
|
||||
if expr == '': # Empty expression
|
||||
return None
|
||||
|
||||
if expr.startswith('('):
|
||||
parens_count = 0
|
||||
for m in re.finditer(r'[()]', expr):
|
||||
if m.group(0) == '(':
|
||||
parens_count += 1
|
||||
else:
|
||||
parens_count -= 1
|
||||
if parens_count == 0:
|
||||
sub_expr = expr[1:m.start()]
|
||||
sub_result = self.interpret_expression(
|
||||
sub_expr, local_vars, allow_recursion)
|
||||
remaining_expr = expr[m.end():].strip()
|
||||
if not remaining_expr:
|
||||
return sub_result
|
||||
else:
|
||||
expr = json.dumps(sub_result) + remaining_expr
|
||||
break
|
||||
else:
|
||||
raise ExtractorError('Premature end of parens in %r' % expr)
|
||||
|
||||
for op, opfunc in _ASSIGN_OPERATORS:
|
||||
m = re.match(r'''(?x)
|
||||
(?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
|
||||
\s*%s
|
||||
(?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
|
||||
if not m:
|
||||
continue
|
||||
right_val = self.interpret_expression(
|
||||
m.group('expr'), local_vars, allow_recursion - 1)
|
||||
|
||||
if m.groupdict().get('index'):
|
||||
lvar = local_vars[m.group('out')]
|
||||
idx = self.interpret_expression(
|
||||
m.group('index'), local_vars, allow_recursion)
|
||||
assert isinstance(idx, int)
|
||||
cur = lvar[idx]
|
||||
val = opfunc(cur, right_val)
|
||||
lvar[idx] = val
|
||||
return val
|
||||
else:
|
||||
cur = local_vars.get(m.group('out'))
|
||||
val = opfunc(cur, right_val)
|
||||
local_vars[m.group('out')] = val
|
||||
return val
|
||||
|
||||
if expr.isdigit():
|
||||
return int(expr)
|
||||
|
||||
if expr.isalpha():
|
||||
return local_vars[expr]
|
||||
var_m = re.match(
|
||||
r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
|
||||
expr)
|
||||
if var_m:
|
||||
return local_vars[var_m.group('name')]
|
||||
|
||||
try:
|
||||
return json.loads(expr)
|
||||
@@ -61,7 +124,7 @@ class JSInterpreter(object):
|
||||
pass
|
||||
|
||||
m = re.match(
|
||||
r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
|
||||
r'(?P<var>%s)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
|
||||
expr)
|
||||
if m:
|
||||
variable = m.group('var')
|
||||
@@ -114,23 +177,31 @@ class JSInterpreter(object):
|
||||
return obj[member](argvals)
|
||||
|
||||
m = re.match(
|
||||
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
|
||||
r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
|
||||
if m:
|
||||
val = local_vars[m.group('in')]
|
||||
idx = self.interpret_expression(
|
||||
m.group('idx'), local_vars, allow_recursion - 1)
|
||||
return val[idx]
|
||||
|
||||
m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
|
||||
if m:
|
||||
a = self.interpret_expression(
|
||||
m.group('a'), local_vars, allow_recursion)
|
||||
b = self.interpret_expression(
|
||||
m.group('b'), local_vars, allow_recursion)
|
||||
return a % b
|
||||
for op, opfunc in _OPERATORS:
|
||||
m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
|
||||
if not m:
|
||||
continue
|
||||
x, abort = self.interpret_statement(
|
||||
m.group('x'), local_vars, allow_recursion - 1)
|
||||
if abort:
|
||||
raise ExtractorError(
|
||||
'Premature left-side return of %s in %r' % (op, expr))
|
||||
y, abort = self.interpret_statement(
|
||||
m.group('y'), local_vars, allow_recursion - 1)
|
||||
if abort:
|
||||
raise ExtractorError(
|
||||
'Premature right-side return of %s in %r' % (op, expr))
|
||||
return opfunc(x, y)
|
||||
|
||||
m = re.match(
|
||||
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
||||
r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr)
|
||||
if m:
|
||||
fname = m.group('func')
|
||||
argvals = tuple([
|
||||
@@ -139,6 +210,7 @@ class JSInterpreter(object):
|
||||
if fname not in self._functions:
|
||||
self._functions[fname] = self.extract_function(fname)
|
||||
return self._functions[fname](argvals)
|
||||
|
||||
raise ExtractorError('Unsupported JS expression %r' % expr)
|
||||
|
||||
def extract_object(self, objname):
|
||||
@@ -162,9 +234,11 @@ class JSInterpreter(object):
|
||||
|
||||
def extract_function(self, funcname):
|
||||
func_m = re.search(
|
||||
(r'(?:function %s|[{;]%s\s*=\s*function)' % (
|
||||
re.escape(funcname), re.escape(funcname))) +
|
||||
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
|
||||
r'''(?x)
|
||||
(?:function\s+%s|[{;]%s\s*=\s*function)\s*
|
||||
\((?P<args>[^)]*)\)\s*
|
||||
\{(?P<code>[^}]+)\}''' % (
|
||||
re.escape(funcname), re.escape(funcname)),
|
||||
self.code)
|
||||
if func_m is None:
|
||||
raise ExtractorError('Could not find JS function %r' % funcname)
|
||||
@@ -172,10 +246,16 @@ class JSInterpreter(object):
|
||||
|
||||
return self.build_function(argnames, func_m.group('code'))
|
||||
|
||||
def call_function(self, funcname, *args):
|
||||
f = self.extract_function(funcname)
|
||||
return f(args)
|
||||
|
||||
def build_function(self, argnames, code):
|
||||
def resf(args):
|
||||
local_vars = dict(zip(argnames, args))
|
||||
for stmt in code.split(';'):
|
||||
res = self.interpret_statement(stmt, local_vars)
|
||||
res, abort = self.interpret_statement(stmt, local_vars)
|
||||
if abort:
|
||||
break
|
||||
return res
|
||||
return resf
|
||||
|
||||
@@ -297,7 +297,7 @@ def parseOpts(overrideArguments=None):
|
||||
' You can filter the video results by putting a condition in'
|
||||
' brackets, as in -f "best[height=720]"'
|
||||
' (or -f "[filesize>10M]"). '
|
||||
' This works for filesize, height, width, tbr, abr, and vbr'
|
||||
' This works for filesize, height, width, tbr, abr, vbr, and fps'
|
||||
' and the comparisons <, <=, >, >=, =, != .'
|
||||
' Formats for which the value is not known are excluded unless you'
|
||||
' put a question mark (?) after the operator.'
|
||||
@@ -698,10 +698,9 @@ def parseOpts(overrideArguments=None):
|
||||
postproc.add_option(
|
||||
'--fixup',
|
||||
metavar='POLICY', dest='fixup', default='detect_or_warn',
|
||||
help='(experimental) Automatically correct known faults of the file. '
|
||||
help='Automatically correct known faults of the file. '
|
||||
'One of never (do nothing), warn (only emit a warning), '
|
||||
'detect_or_warn(check whether we can do anything about it, warn '
|
||||
'otherwise')
|
||||
'detect_or_warn(the default; fix file if we can, warn otherwise)')
|
||||
postproc.add_option(
|
||||
'--prefer-avconv',
|
||||
action='store_false', dest='prefer_ffmpeg',
|
||||
|
||||
+17
-6
@@ -32,6 +32,7 @@ import xml.etree.ElementTree
|
||||
import zlib
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
compat_chr,
|
||||
compat_getenv,
|
||||
compat_html_entities,
|
||||
@@ -140,7 +141,7 @@ else:
|
||||
def find_xpath_attr(node, xpath, key, val):
|
||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||
# .//node does not match if a node is a direct child of . !
|
||||
if isinstance(xpath, unicode):
|
||||
if isinstance(xpath, compat_str):
|
||||
xpath = xpath.encode('ascii')
|
||||
|
||||
for f in node.findall(xpath):
|
||||
@@ -654,9 +655,14 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
|
||||
self._params = params
|
||||
|
||||
def https_open(self, req):
|
||||
kwargs = {}
|
||||
if hasattr(self, '_context'): # python > 2.6
|
||||
kwargs['context'] = self._context
|
||||
if hasattr(self, '_check_hostname'): # python 3.x
|
||||
kwargs['check_hostname'] = self._check_hostname
|
||||
return self.do_open(functools.partial(
|
||||
_create_http_connection, self, self._https_conn_class, True),
|
||||
req)
|
||||
req, **kwargs)
|
||||
|
||||
|
||||
def parse_iso8601(date_str, delimiter='T'):
|
||||
@@ -1257,7 +1263,7 @@ def float_or_none(v, scale=1, invscale=1, default=None):
|
||||
|
||||
|
||||
def parse_duration(s):
|
||||
if not isinstance(s, basestring if sys.version_info < (3, 0) else compat_str):
|
||||
if not isinstance(s, compat_basestring):
|
||||
return None
|
||||
|
||||
s = s.strip()
|
||||
@@ -1269,7 +1275,10 @@ def parse_duration(s):
|
||||
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
|
||||
|
||||
(?:
|
||||
(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
|
||||
(?:
|
||||
(?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
|
||||
(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
|
||||
)?
|
||||
(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
|
||||
)?
|
||||
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
|
||||
@@ -1287,6 +1296,8 @@ def parse_duration(s):
|
||||
res += int(m.group('mins')) * 60
|
||||
if m.group('hours'):
|
||||
res += int(m.group('hours')) * 60 * 60
|
||||
if m.group('days'):
|
||||
res += int(m.group('days')) * 24 * 60 * 60
|
||||
if m.group('ms'):
|
||||
res += float(m.group('ms'))
|
||||
return res
|
||||
@@ -1421,7 +1432,7 @@ def uppercase_escape(s):
|
||||
|
||||
def escape_rfc3986(s):
|
||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||
if sys.version_info < (3, 0) and isinstance(s, unicode):
|
||||
if sys.version_info < (3, 0) and isinstance(s, compat_str):
|
||||
s = s.encode('utf-8')
|
||||
return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
|
||||
|
||||
@@ -1537,7 +1548,7 @@ def js_to_json(code):
|
||||
res = re.sub(r'''(?x)
|
||||
"(?:[^"\\]*(?:\\\\|\\")?)*"|
|
||||
'(?:[^'\\]*(?:\\\\|\\')?)*'|
|
||||
[a-zA-Z_][a-zA-Z_0-9]*
|
||||
[a-zA-Z_][.a-zA-Z_0-9]*
|
||||
''', fix_kv, code)
|
||||
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
|
||||
return res
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.01.25'
|
||||
__version__ = '2015.02.02.2'
|
||||
|
||||
Reference in New Issue
Block a user