1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2026-06-11 15:10:15 +00:00

Compare commits

..

70 Commits

Author SHA1 Message Date
Philipp Hagemeister f1f25be6db release 2014.04.30 2014-04-30 02:05:03 +02:00
Philipp Hagemeister deab8c1960 Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-30 02:04:55 +02:00
Philipp Hagemeister c57f775710 [YoutubeDL] Add simple tests for format_note (Closes #2825) 2014-04-30 02:02:41 +02:00
AGSPhoenix e75cafe9fb Clean up format list for consistency
This should make the format list output look a bit nicer.
2014-04-30 01:52:05 +02:00
Philipp Hagemeister 33ab8453c4 Merge pull request #2813 from dstftw/test-real-download-improvement
Improve download mechanism when Range HTTP header is ignored
2014-04-30 01:50:33 +02:00
Philipp Hagemeister ebd3c7b370 [generic] Add support for protocol-independent URLs (Fixes #2810) 2014-04-30 01:46:06 +02:00
Philipp Hagemeister 29645a1d44 Merge remote-tracking branch 'pulpe/moviezinese' 2014-04-30 01:37:05 +02:00
Philipp Hagemeister 22d99a801a [syfy] Add suppor for generic URLs (Fixes #2827) 2014-04-30 01:35:52 +02:00
Jaime Marquínez Ferrándiz 57b8d84cd9 [5min] Raise an error if the 'success' field is False
For example for georestricted videos.
2014-04-29 14:57:38 +02:00
Sergey M․ 65e4ad5bfe [rtbf] Minor changes and YouTube videos support 2014-04-29 19:41:58 +07:00
Nicolas Évrard 98b7d476d9 [RTBFVideo] Remove useless print statement 2014-04-28 23:19:56 +02:00
Nicolas Évrard 201e3c99b9 [RTBFVideo] Add new extractor 2014-04-28 20:32:13 +02:00
Sergey M․ 8a7a4a9796 [scivee] Skip test for now 2014-04-28 19:52:32 +07:00
Sergey M․ df297c8794 [http] Improve download mechanism when Range HTTP header is ignored 2014-04-27 09:32:01 +07:00
pulpe 3f53a75f02 [moviezine] Add extractor for moviezine.se (fixes #2808) 2014-04-26 18:55:29 +02:00
Sergey M․ 7c360e3a04 [scivee] Add support for scivee.tv 2014-04-26 20:22:15 +07:00
Sergey M․ d2176c8011 [nrk] Add support for nrk.no (Closes #2804) 2014-04-25 21:34:44 +07:00
Jaime Marquínez Ferrándiz aa92f06308 [youtube] Don't call 'unquote_plus' on the video title (fixes #2799)
It's already unquoted after calling 'compat_parse_qs'.
It replaced '+' with spaces, for example in https://www.youtube.com/watch?v=XC0b5YexO-I.
2014-04-25 13:19:03 +02:00
Jaime Marquínez Ferrándiz e00c9cf599 [youtube] Update test description field 2014-04-25 13:14:15 +02:00
Jaime Marquínez Ferrándiz ba60a3ebe0 [youtube] Update test description field 2014-04-25 12:57:04 +02:00
Jaime Marquínez Ferrándiz efb7e11988 [vimeo] Add an extractor for the watch later list (closes #2787) 2014-04-24 21:51:20 +02:00
Sergey M․ a55c8b7aac [9gag] Fix post view regex 2014-04-24 19:52:34 +07:00
Jaime Marquínez Ferrándiz a980bc4324 [vimeo] Fix logging in python 3.x
The POST data must be a bytes object.
2014-04-24 14:44:27 +02:00
Sergey M․ 4b10aadffc [dailymotion] Fix user playlist extraction 2014-04-23 19:42:34 +07:00
Sergey M․ 5bec574859 [ted] Update test 2014-04-22 19:49:41 +07:00
Philipp Hagemeister d11271dd29 [youtube] Include video Id in common error message (Fixes #2786) 2014-04-21 20:34:03 +02:00
Philipp Hagemeister 1d9d26d09b release 2014.04.21.6 2014-04-21 16:18:32 +02:00
Philipp Hagemeister c0292e8ab7 [generic] Improve jwplayer detection (Fixes #2731) 2014-04-21 16:16:53 +02:00
Philipp Hagemeister f44e5d8b43 [vuclip] Fix VALID_URL regex 2014-04-21 16:14:21 +02:00
Philipp Hagemeister 6ea74538e3 release 2014.04.21.5 2014-04-21 15:56:23 +02:00
Philipp Hagemeister 24b8924b46 [facebook] Correct login (Fixes #2743) 2014-04-21 15:56:09 +02:00
Philipp Hagemeister 86a3c67112 release 2014.04.21.4 2014-04-21 15:25:16 +02:00
Philipp Hagemeister 8be874370d Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-21 15:24:51 +02:00
Philipp Hagemeister aec74dd95a [vuclip] Add extractor (Fixes #2735) 2014-04-21 15:24:44 +02:00
Sergey M․ 6890574256 [rutube] Add missing whitespace 2014-04-21 19:04:11 +07:00
Sergey M․ d03745c684 [jukebox] Update test md5 2014-04-21 19:00:27 +07:00
Philipp Hagemeister 28746fbd59 [bilibili] Add preliminary support (#2174)
The URL http://www.bilibili.tv/video/av636603/index_2.html does not work yet.
2014-04-21 13:46:41 +02:00
Philipp Hagemeister 0321213c11 [test_subtitles] Allow more subtitles for TED videos 2014-04-21 13:20:14 +02:00
Philipp Hagemeister 3f0aae4244 release 2014.04.21.3 2014-04-21 12:40:09 +02:00
Philipp Hagemeister 48099643cc [generic] Be more relaxed when looking for aparat embeds (Fixes #2784) 2014-04-21 12:37:41 +02:00
Philipp Hagemeister 621f33c9d0 [ted] Extend search for description 2014-04-21 12:37:16 +02:00
Philipp Hagemeister f07a9f6f43 [ted] Remove superfluous u prefixes 2014-04-21 12:34:32 +02:00
Philipp Hagemeister e51880fd32 [cnet] Correct JSON capturing 2014-04-21 07:59:29 +02:00
Philipp Hagemeister 88ce273da4 [arte] differentiate JSON outputs 2014-04-21 07:59:16 +02:00
Philipp Hagemeister b9ba5dfa28 [test helper] Correct only_matching test gathering 2014-04-21 07:56:51 +02:00
Philipp Hagemeister 4086f11929 release 2014.04.21.2 2014-04-21 07:12:12 +02:00
Philipp Hagemeister 478c2c6193 [clubic] Add extractor (Fixes #2773) 2014-04-21 07:12:02 +02:00
Philipp Hagemeister d2d6481afb [mdr] Remove unused imports 2014-04-21 06:49:21 +02:00
Philipp Hagemeister 43acb120f3 release 2014.04.21.1 2014-04-21 06:28:25 +02:00
Philipp Hagemeister e8f2025edf [mdr] Add support for modern URLs (Fixes #2775) 2014-04-21 06:25:21 +02:00
Philipp Hagemeister a4eb9578af [yahoo] Add support for movies (Fixes #2780) 2014-04-21 06:18:04 +02:00
Philipp Hagemeister fa35cdad02 [condenast|generic] Add support for condenast embeds (Fixes #2783) 2014-04-21 05:47:52 +02:00
Philipp Hagemeister d1b9c912a4 [utils] Fix _windows_write_string (Fixes #2779)
It turns out that the function did not work for outputs longer than 1024 UCS-2 tokens.
Write non-BMP characters one by one to ensure that we count correctly.
2014-04-21 04:59:46 +02:00
Philipp Hagemeister edec83a025 [infoq] Add support for HTTP downloads (Fixes #722) 2014-04-21 03:21:34 +02:00
Philipp Hagemeister c0a7c60815 [infoq] Simplify (#2777) 2014-04-21 02:55:35 +02:00
Philipp Hagemeister 117a7d1944 Merge remote-tracking branch 'kwbr/master' 2014-04-21 02:48:04 +02:00
Philipp Hagemeister a40e0dd434 release 2014.04.21 2014-04-21 02:34:53 +02:00
Philipp Hagemeister 188b086dd9 Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-21 02:34:44 +02:00
Philipp Hagemeister 1f27d2c0e1 [steam] Add support for steamcommunity.com (Fixes #2757) 2014-04-21 02:34:34 +02:00
Kai Weber 7560096db5 [infoq] Simplify playpath calculation 2014-04-20 01:10:30 +02:00
Kai Weber 282cb9c7ba [infoq] Fix extractor 2014-04-20 01:01:37 +02:00
Sergey M․ 3a9d6790ad [ivi] Update playlist tests 2014-04-20 03:06:50 +07:00
Philipp Hagemeister 0610a3e0b2 Remove unused imports 2014-04-19 19:57:09 +02:00
Philipp Hagemeister 7f9c31df88 [steam] Simplify 2014-04-19 19:55:53 +02:00
Philipp Hagemeister 3fa6b6e293 [steam] Modernize 2014-04-19 19:51:04 +02:00
Philipp Hagemeister 3c50b99ab4 [extremetube] Modernize 2014-04-19 19:42:51 +02:00
Philipp Hagemeister 52fadd5fb2 [test_all_urls] Add support for distributed URL matching test definition 2014-04-19 19:41:06 +02:00
Philipp Hagemeister 5367fe7f4d [test_all_urls] Simplify 2014-04-19 13:01:15 +02:00
Philipp Hagemeister 427588f6e7 Merge remote-tracking branch 'MikeCol/extremetube-gay' 2014-04-19 12:59:52 +02:00
MikeCol 4145a257be Extended regex match to include gay clips 2014-04-19 00:29:42 +02:00
40 changed files with 1015 additions and 297 deletions
+24 -4
View File
@@ -74,13 +74,19 @@ class FakeYDL(YoutubeDL):
old_report_warning(message)
self.report_warning = types.MethodType(report_warning, self)
def gettestcases():
def gettestcases(include_onlymatching=False):
for ie in youtube_dl.extractor.gen_extractors():
t = getattr(ie, '_TEST', None)
if t:
t['name'] = type(ie).__name__[:-len('IE')]
yield t
for t in getattr(ie, '_TESTS', []):
assert not hasattr(ie, '_TESTS'), \
'%s has _TEST and _TESTS' % type(ie).__name__
tests = [t]
else:
tests = getattr(ie, '_TESTS', [])
for t in tests:
if not include_onlymatching and t.get('only_matching', False):
continue
t['name'] = type(ie).__name__[:-len('IE')]
yield t
@@ -128,3 +134,17 @@ def expect_info_dict(self, expected_dict, got_dict):
missing_keys,
'Missing keys in test definition: %s' % (
', '.join(sorted(missing_keys))))
def assertRegexpMatches(self, text, regexp, msg=None):
if hasattr(self, 'assertRegexpMatches'):
return self.assertRegexpMatches(text, regexp, msg)
else:
m = re.match(regexp, text)
if not m:
note = 'Regexp didn\'t match: %r not found in %r' % (regexp, text)
if msg is None:
msg = note
else:
msg = note + ', ' + msg
self.assertTrue(m, msg)
+7 -1
View File
@@ -8,7 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from test.helper import FakeYDL, assertRegexpMatches
from youtube_dl import YoutubeDL
from youtube_dl.extractor import YoutubeIE
@@ -274,6 +274,12 @@ class TestFormatSelection(unittest.TestCase):
# Replace missing fields with 'NA'
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
def test_format_note(self):
ydl = YoutubeDL()
self.assertEqual(ydl._format_note({}), '')
assertRegexpMatches(self, ydl._format_note({
'vbr': 10,
}), '^x\s*10k$')
if __name__ == '__main__':
unittest.main()
+12 -11
View File
@@ -77,20 +77,20 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
def test_justin_tv_channelid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv"))
self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv"))
self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv"))
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv"))
self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/"))
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/"))
self.assertTrue(JustinTVIE.suitable('justin.tv/vanillatv'))
self.assertTrue(JustinTVIE.suitable('twitch.tv/vanillatv'))
self.assertTrue(JustinTVIE.suitable('www.justin.tv/vanillatv'))
self.assertTrue(JustinTVIE.suitable('www.twitch.tv/vanillatv'))
self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv'))
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv'))
self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv/'))
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/'))
def test_justintv_videoid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483"))
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))
def test_justin_tv_chapterid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))
def test_youtube_extract(self):
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
@@ -106,7 +106,7 @@ class TestAllURLsMatching(unittest.TestCase):
def test_no_duplicates(self):
ies = gen_extractors()
for tc in gettestcases():
for tc in gettestcases(include_onlymatching=True):
url = tc['url']
for ie in ies:
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
@@ -176,5 +176,6 @@ class TestAllURLsMatching(unittest.TestCase):
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
['Yahoo'])
if __name__ == '__main__':
unittest.main()
+3 -3
View File
@@ -192,8 +192,8 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dezhurnyi_angel')
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)')
self.assertTrue(len(result['entries']) >= 36)
self.assertTrue(len(result['entries']) >= 23)
def test_ivi_compilation_season(self):
dl = FakeYDL()
ie = IviCompilationIE(dl)
@@ -201,7 +201,7 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dezhurnyi_angel/season2')
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон')
self.assertTrue(len(result['entries']) >= 20)
self.assertTrue(len(result['entries']) >= 7)
def test_imdb_list(self):
dl = FakeYDL()
+1 -1
View File
@@ -181,7 +181,7 @@ class TestTedSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 28)
self.assertTrue(len(subtitles.keys()) >= 28)
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
+46 -46
View File
@@ -1139,57 +1139,57 @@ class YoutubeDL(object):
res = default
return res
def list_formats(self, info_dict):
def format_note(fdict):
res = ''
if fdict.get('ext') in ['f4f', 'f4m']:
res += '(unsupported) '
if fdict.get('format_note') is not None:
res += fdict['format_note'] + ' '
if fdict.get('tbr') is not None:
res += '%4dk ' % fdict['tbr']
if fdict.get('container') is not None:
if res:
res += ', '
res += '%s container' % fdict['container']
if (fdict.get('vcodec') is not None and
fdict.get('vcodec') != 'none'):
if res:
res += ', '
res += fdict['vcodec']
if fdict.get('vbr') is not None:
res += '@'
elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
res += 'video@'
def _format_note(self, fdict):
res = ''
if fdict.get('ext') in ['f4f', 'f4m']:
res += '(unsupported) '
if fdict.get('format_note') is not None:
res += fdict['format_note'] + ' '
if fdict.get('tbr') is not None:
res += '%4dk ' % fdict['tbr']
if fdict.get('container') is not None:
if res:
res += ', '
res += '%s container' % fdict['container']
if (fdict.get('vcodec') is not None and
fdict.get('vcodec') != 'none'):
if res:
res += ', '
res += fdict['vcodec']
if fdict.get('vbr') is not None:
res += '%4dk' % fdict['vbr']
if fdict.get('acodec') is not None:
if res:
res += ', '
if fdict['acodec'] == 'none':
res += 'video only'
else:
res += '%-5s' % fdict['acodec']
elif fdict.get('abr') is not None:
if res:
res += ', '
res += 'audio'
if fdict.get('abr') is not None:
res += '@%3dk' % fdict['abr']
if fdict.get('asr') is not None:
res += ' (%5dHz)' % fdict['asr']
if fdict.get('filesize') is not None:
if res:
res += ', '
res += format_bytes(fdict['filesize'])
return res
res += '@'
elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
res += 'video@'
if fdict.get('vbr') is not None:
res += '%4dk' % fdict['vbr']
if fdict.get('acodec') is not None:
if res:
res += ', '
if fdict['acodec'] == 'none':
res += 'video only'
else:
res += '%-5s' % fdict['acodec']
elif fdict.get('abr') is not None:
if res:
res += ', '
res += 'audio'
if fdict.get('abr') is not None:
res += '@%3dk' % fdict['abr']
if fdict.get('asr') is not None:
res += ' (%5dHz)' % fdict['asr']
if fdict.get('filesize') is not None:
if res:
res += ', '
res += format_bytes(fdict['filesize'])
return res
def list_formats(self, info_dict):
def line(format, idlen=20):
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
format['format_id'],
format['ext'],
self.format_resolution(format),
format_note(format),
self._format_note(format),
))
formats = info_dict.get('formats', [info_dict])
@@ -1197,8 +1197,8 @@ class YoutubeDL(object):
max(len(f['format_id']) for f in formats))
formats_s = [line(f, idlen) for f in formats]
if len(formats) > 1:
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
header_line = line({
'format_id': 'format code', 'ext': 'extension',
+19 -3
View File
@@ -14,6 +14,8 @@ from ..utils import (
class HttpFD(FileDownloader):
_TEST_FILE_SIZE = 10241
def real_download(self, filename, info_dict):
url = info_dict['url']
tmpfilename = self.temp_name(filename)
@@ -28,8 +30,10 @@ class HttpFD(FileDownloader):
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
if self.params.get('test', False):
request.add_header('Range', 'bytes=0-10240')
is_test = self.params.get('test', False)
if is_test:
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
@@ -100,6 +104,15 @@ class HttpFD(FileDownloader):
return False
data_len = data.info().get('Content-length', None)
# Range HTTP header may be ignored/unsupported by a webserver
# (e.g. extractor/scivee.py, extractor/bambuser.py).
# However, for a test we still would like to download just a piece of a file.
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
# block size when downloading a file.
if is_test and data_len > self._TEST_FILE_SIZE:
data_len = self._TEST_FILE_SIZE
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get("min_filesize", None)
@@ -118,7 +131,7 @@ class HttpFD(FileDownloader):
while True:
# Download and write
before = time.time()
data_block = data.read(block_size)
data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
after = time.time()
if len(data_block) == 0:
break
@@ -162,6 +175,9 @@ class HttpFD(FileDownloader):
'speed': speed,
})
if is_test and byte_counter == data_len:
break
# Apply rate limit
self.slow_down(start, byte_counter - resume_len)
+8
View File
@@ -20,6 +20,7 @@ from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .bilibili import BiliBiliIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
@@ -40,6 +41,7 @@ from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE
from .clubic import ClubicIE
from .cmt import CMTIE
from .cnet import CNETIE
from .cnn import (
@@ -158,6 +160,7 @@ from .mofosex import MofosexIE
from .mooshare import MooshareIE
from .morningstar import MorningstarIE
from .motorsport import MotorsportIE
from .moviezine import MoviezineIE
from .movshare import MovShareIE
from .mtv import (
MTVIE,
@@ -186,6 +189,7 @@ from .normalboots import NormalbootsIE
from .novamov import NovaMovIE
from .nowness import NownessIE
from .nowvideo import NowVideoIE
from .nrk import NRKIE
from .ntv import NTVIE
from .oe1 import OE1IE
from .ooyala import OoyalaIE
@@ -207,6 +211,7 @@ from .ringtv import RingTVIE
from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE
from .rtbf import RTBFIE
from .rtlnow import RTLnowIE
from .rts import RTSIE
from .rtve import RTVEALaCartaIE
@@ -218,6 +223,7 @@ from .rutube import (
)
from .rutv import RUTVIE
from .savefrom import SaveFromIE
from .scivee import SciVeeIE
from .servingsys import ServingSysIE
from .sina import SinaIE
from .slideshare import SlideshareIE
@@ -290,11 +296,13 @@ from .vimeo import (
VimeoAlbumIE,
VimeoGroupsIE,
VimeoReviewIE,
VimeoWatchLaterIE,
)
from .vine import VineIE
from .viki import VikiIE
from .vk import VKIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .washingtonpost import WashingtonPostIE
from .wat import WatIE
from .wdr import (
+2 -1
View File
@@ -74,7 +74,8 @@ class ArteTVPlus7IE(InfoExtractor):
return self._extract_from_webpage(webpage, video_id, lang)
def _extract_from_webpage(self, webpage, video_id, lang):
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
json_url = self._html_search_regex(
r'arte_vp_url="(.*?)"', webpage, 'json vp url')
return self._extract_from_json_url(json_url, video_id, lang)
def _extract_from_json_url(self, json_url, video_id, lang):
+106
View File
@@ -0,0 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_parse_qs,
ExtractorError,
int_or_none,
unified_strdate,
)
class BiliBiliIE(InfoExtractor):
_VALID_URL = r'http://www\.bilibili\.tv/video/av(?P<id>[0-9]+)/'
_TEST = {
'url': 'http://www.bilibili.tv/video/av1074402/',
'md5': '2c301e4dab317596e837c3e7633e7d86',
'info_dict': {
'id': '1074402',
'ext': 'flv',
'title': '【金坷垃】金泡沫',
'duration': 308,
'upload_date': '20140420',
'thumbnail': 're:^https?://.+\.jpg',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_code = self._search_regex(
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
title = self._html_search_meta(
'media:title', video_code, 'title', fatal=True)
duration_str = self._html_search_meta(
'duration', video_code, 'duration')
if duration_str is None:
duration = None
else:
duration_mobj = re.match(
r'^T(?:(?P<hours>[0-9]+)H)?(?P<minutes>[0-9]+)M(?P<seconds>[0-9]+)S$',
duration_str)
duration = (
int_or_none(duration_mobj.group('hours'), default=0) * 3600 +
int(duration_mobj.group('minutes')) * 60 +
int(duration_mobj.group('seconds')))
upload_date = unified_strdate(self._html_search_meta(
'uploadDate', video_code, fatal=False))
thumbnail = self._html_search_meta(
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
player_params = compat_parse_qs(self._html_search_regex(
r'<iframe .*?class="player" src="https://secure.bilibili.tv/secure,([^"]+)"',
webpage, 'player params'))
if 'cid' in player_params:
cid = player_params['cid'][0]
lq_doc = self._download_xml(
'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
video_id,
note='Downloading LQ video info'
)
lq_durl = lq_doc.find('.//durl')
formats = [{
'format_id': 'lq',
'quality': 1,
'url': lq_durl.find('./url').text,
'filesize': int_or_none(
lq_durl.find('./size'), get_attr='text'),
}]
hq_doc = self._download_xml(
'http://interface.bilibili.cn/playurl?cid=%s' % cid,
video_id,
note='Downloading HQ video info',
fatal=False,
)
if hq_doc is not False:
hq_durl = hq_doc.find('.//durl')
formats.append({
'format_id': 'hq',
'quality': 2,
'ext': 'flv',
'url': hq_durl.find('./url').text,
'filesize': int_or_none(
hq_durl.find('./size'), get_attr='text'),
})
else:
raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
'duration': duration,
'upload_date': upload_date,
'thumbnail': thumbnail,
}
+58
View File
@@ -0,0 +1,58 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
qualities,
)
class ClubicIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/[^/]+/video.*-(?P<id>[0-9]+)\.html'
_TEST = {
'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
'md5': '1592b694ba586036efac1776b0b43cd3',
'info_dict': {
'id': '448474',
'ext': 'mp4',
'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité',
'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
player_page = self._download_webpage(player_url, video_id)
config_json = self._search_regex(
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
'configuration')
config = json.loads(config_json)
video_info = config['videoInfo']
sources = config['sources']
quality_order = qualities(['sd', 'hq'])
formats = [{
'format_id': src['streamQuality'],
'url': src['src'],
'quality': quality_order(src['streamQuality']),
} for src in sources]
self._sort_formats(formats)
return {
'id': video_id,
'title': video_info['title'],
'formats': formats,
'description': clean_html(video_info.get('description')),
'thumbnail': config.get('poster'),
}
+1 -1
View File
@@ -33,7 +33,7 @@ class CNETIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
webpage, 'data json')
data = json.loads(data_json)
vdata = data['video']
+5 -2
View File
@@ -279,9 +279,12 @@ class InfoExtractor(object):
def _download_xml(self, url_or_request, video_id,
note=u'Downloading XML', errnote=u'Unable to download XML',
transform_source=None):
transform_source=None, fatal=True):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
xml_string = self._download_webpage(
url_or_request, video_id, note, errnote, fatal=fatal)
if xml_string is False:
return xml_string
if transform_source:
xml_string = transform_source(xml_string)
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
+18 -12
View File
@@ -28,16 +28,18 @@ class CondeNastIE(InfoExtractor):
'glamour': 'Glamour',
'wmagazine': 'W Magazine',
'vanityfair': 'Vanity Fair',
'cnevids': 'Condé Nast',
}
_VALID_URL = r'http://(video|www)\.(?P<site>%s)\.com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
_VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
_TEST = {
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
'file': '5171b343c2b4c00dd0c1ccb3.mp4',
'md5': '1921f713ed48aabd715691f774c451f7',
'info_dict': {
'id': '5171b343c2b4c00dd0c1ccb3',
'ext': 'mp4',
'title': '3D Printed Speakers Lit With LED',
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
}
@@ -55,12 +57,16 @@ class CondeNastIE(InfoExtractor):
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
return self.playlist_result(entries, playlist_title=title)
def _extract_video(self, webpage):
description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
r'<div class="video-post-content">(.+?)</div>',
],
webpage, 'description',
fatal=False, flags=re.DOTALL)
def _extract_video(self, webpage, url_type):
if url_type != 'embed':
description = self._html_search_regex(
[
r'<div class="cne-video-description">(.+?)</div>',
r'<div class="video-post-content">(.+?)</div>',
],
webpage, 'description', fatal=False, flags=re.DOTALL)
else:
description = None
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
'player params', flags=re.DOTALL)
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
@@ -99,12 +105,12 @@ class CondeNastIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
site = mobj.group('site')
url_type = mobj.group('type')
id = mobj.group('id')
item_id = mobj.group('id')
self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site])
webpage = self._download_webpage(url, id)
self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
webpage = self._download_webpage(url, item_id)
if url_type == 'series':
return self._extract_series(url, webpage)
else:
return self._extract_video(webpage)
return self._extract_video(webpage, url_type)
+5 -6
View File
@@ -8,12 +8,11 @@ from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_urllib_request,
compat_str,
get_element_by_id,
orderedSet,
str_to_int,
int_or_none,
ExtractorError,
unescapeHTML,
)
class DailymotionBaseInfoExtractor(InfoExtractor):
@@ -189,7 +188,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum)
video_ids.extend(re.findall(r'data-id="(.+?)"', webpage))
video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
break
@@ -218,9 +217,9 @@ class DailymotionUserIE(DailymotionPlaylistIE):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')
webpage = self._download_webpage(url, user)
full_user = self._html_search_regex(
r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user),
webpage, u'user', flags=re.DOTALL)
full_user = unescapeHTML(self._html_search_regex(
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
webpage, u'user', flags=re.DOTALL))
return {
'_type': 'playlist',
+24 -16
View File
@@ -1,4 +1,5 @@
import os
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -8,18 +9,23 @@ from ..utils import (
compat_urllib_parse,
)
class ExtremeTubeIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
_TEST = {
u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
u'file': u'652431.mp4',
u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0',
u'info_dict': {
u"title": u"Music Video 14 british euro brit european cumshots swallow",
u"uploader": u"unknown",
u"age_limit": 18,
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
'info_dict': {
'id': '652431',
'ext': 'mp4',
'title': 'Music Video 14 british euro brit european cumshots swallow',
'uploader': 'unknown',
'age_limit': 18,
}
}
}, {
'url': 'http://www.extremetube.com/gay/video/abcde-1234',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -30,11 +36,14 @@ class ExtremeTubeIE(InfoExtractor):
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title')
uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False)
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url'))
video_title = self._html_search_regex(
r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title')
uploader = self._html_search_regex(
r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
fatal=False)
video_url = compat_urllib_parse.unquote(self._html_search_regex(
r'video_url=(.+?)&amp;', webpage, 'video_url'))
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2]
format = "-".join(format)
@@ -43,7 +52,6 @@ class ExtremeTubeIE(InfoExtractor):
'title': video_title,
'uploader': uploader,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'age_limit': 18,
+1 -2
View File
@@ -76,9 +76,8 @@ class FacebookIE(InfoExtractor):
check_form = {
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'),
'name_action_selected': 'dont_save',
'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
}
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+11 -2
View File
@@ -6,6 +6,7 @@ from .common import InfoExtractor
from ..utils import (
compat_str,
compat_urllib_parse,
ExtractorError,
)
@@ -58,9 +59,17 @@ class FiveMinIE(InfoExtractor):
'isPlayerSeed': 'true',
'url': embed_url,
})
info = self._download_json(
response = self._download_json(
'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
video_id)['binding'][0]
video_id)
if not response['success']:
err_msg = response['errorMessage']
if err_msg == 'ErrorVideoUserNotGeo':
msg = 'Video not available from your location'
else:
msg = 'Aol said: %s' % err_msg
raise ExtractorError(msg, expected=True, video_id=video_id)
info = response['binding'][0]
second_id = compat_str(int(video_id[:-2]) + 1)
formats = []
+56 -3
View File
@@ -239,6 +239,28 @@ class GenericIE(InfoExtractor):
'uploader_id': 'rbctv_2012_4',
},
},
# Condé Nast embed
{
'url': 'http://www.wired.com/2014/04/honda-asimo/',
'md5': 'ba0dfe966fa007657bd1443ee672db0f',
'info_dict': {
'id': '53501be369702d3275860000',
'ext': 'mp4',
'title': 'Hondas New Asimo Robot Is More Human Than Ever',
}
},
# Dailymotion embed
{
'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
'md5': '441aeeb82eb72c422c7f14ec533999cd',
'info_dict': {
'id': 'k2mm4bCdJ6CQ2i7c8o2',
'ext': 'mp4',
'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
'uploader': 'Spi0n',
},
'add_ie': ['Dailymotion'],
}
]
def report_download_webpage(self, video_id):
@@ -323,6 +345,15 @@ class GenericIE(InfoExtractor):
}
def _real_extract(self, url):
if url.startswith('//'):
return {
'_type': 'url',
'url': (
'http:'
if self._downloader.params.get('prefer_insecure', False)
else 'https:') + url,
}
parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme:
default_search = self._downloader.params.get('default_search')
@@ -459,7 +490,7 @@ class GenericIE(InfoExtractor):
matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
urlrs = [self.url_result(unescapeHTML(tuppl[1]))
for tuppl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
@@ -485,6 +516,22 @@ class GenericIE(InfoExtractor):
if mobj:
return self.url_result(mobj.group(1), 'BlipTV')
# Look for embedded condenast player
matches = re.findall(
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
webpage)
if matches:
return {
'_type': 'playlist',
'entries': [{
'_type': 'url',
'ie_key': 'CondeNast',
'url': ma,
} for ma in matches],
'title': video_title,
'id': video_id,
}
# Look for Bandcamp pages with custom domain
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
if mobj is not None:
@@ -505,7 +552,7 @@ class GenericIE(InfoExtractor):
return OoyalaIE._build_url_result(mobj.group('ec'))
# Look for Aparat videos
mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
if mobj is not None:
return self.url_result(mobj.group(1), 'Aparat')
@@ -593,7 +640,13 @@ class GenericIE(InfoExtractor):
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
# Look for gorilla-vid style embedding
mobj = re.search(r'(?s)(?:jw_plugins|JWPlayerOptions).*?file\s*:\s*["\'](.*?)["\']', webpage)
mobj = re.search(r'''(?sx)
(?:
jw_plugins|
JWPlayerOptions|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
)
.*?file\s*:\s*["\'](.*?)["\']''', webpage)
if mobj is None:
# Broaden the search a little bit
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
+2 -5
View File
@@ -106,7 +106,7 @@ class OneUPIE(IGNIE):
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
_TEST = {
_TESTS = [{
'url': 'http://gamevideos.1up.com/video/id/34976',
'md5': '68a54ce4ebc772e4b71e3123d413163d',
'info_dict': {
@@ -115,10 +115,7 @@ class OneUPIE(IGNIE):
'title': 'Sniper Elite V2 - Trailer',
'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
}
}
# Override IGN tests
_TESTS = []
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
+34 -22
View File
@@ -11,16 +11,15 @@ from ..utils import (
class InfoQIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
_TEST = {
"name": "InfoQ",
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
"file": "12-jan-pythonthings.mp4",
"info_dict": {
"description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
"title": "A Few of My Favorite [Python] Things",
},
"params": {
"skip_download": True,
'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
'info_dict': {
'id': '12-jan-pythonthings',
'ext': 'mp4',
'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
'title': 'A Few of My Favorite [Python] Things',
},
}
@@ -30,26 +29,39 @@ class InfoQIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
video_description = self._html_search_meta('description', webpage, 'description')
# The server URL is hardcoded
video_url = 'rtmpe://video.infoq.com/cfx/st/'
# Extract video URL
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
encoded_id = self._search_regex(
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
playpath = 'mp4:' + real_id
# Extract title
video_title = self._search_regex(r'contentTitle = "(.*?)";',
webpage, 'title')
# Extract description
video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
webpage, 'description', fatal=False)
video_filename = video_url.split('/')[-1]
video_filename = playpath.split('/')[-1]
video_id, extension = video_filename.split('.')
http_base = self._search_regex(
r'EXPRESSINSTALL_SWF\s*=\s*"(https?://[^/"]+/)', webpage,
'HTTP base URL')
formats = [{
'format_id': 'rtmp',
'url': video_url,
'ext': extension,
'play_path': playpath,
}, {
'format_id': 'http',
'url': http_base + real_id,
}]
self._sort_formats(formats)
return {
'id': video_id,
'url': video_url,
'title': video_title,
'ext': extension, # Extension is always(?) mp4, but seems to be flv
'description': video_description,
'formats': formats,
}
+1 -1
View File
@@ -14,7 +14,7 @@ class JukeboxIE(InfoExtractor):
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
_TEST = {
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
'md5': '5dc6477e74b1e37042ac5acedd8413e5',
'md5': '1574e9b4d6438446d5b7dbcdf2786276',
'info_dict': {
'id': 'r303r',
'ext': 'flv',
+11 -11
View File
@@ -1,15 +1,18 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class MDRIE(InfoExtractor):
_VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
_VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
# No tests, MDR regularily deletes its videos
_TEST = {
'url': 'http://www.mdr.de/fakt/video189002.html',
'only_matching': True,
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
@@ -19,9 +22,9 @@ class MDRIE(InfoExtractor):
# determine title and media streams from webpage
html = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h2>(.*?)</h2>', html, u'title')
title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title')
xmlurl = self._search_regex(
r'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, u'XML URL')
r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL')
doc = self._download_xml(domain + xmlurl, video_id)
formats = []
@@ -41,7 +44,7 @@ class MDRIE(InfoExtractor):
if vbr_el is None:
format.update({
'vcodec': 'none',
'format_id': u'%s-%d' % (media_type, abr),
'format_id': '%s-%d' % (media_type, abr),
})
else:
vbr = int(vbr_el.text) // 1000
@@ -49,12 +52,9 @@ class MDRIE(InfoExtractor):
'vbr': vbr,
'width': int(a.find('frameWidth').text),
'height': int(a.find('frameHeight').text),
'format_id': u'%s-%d' % (media_type, vbr),
'format_id': '%s-%d' % (media_type, vbr),
})
formats.append(format)
if not formats:
raise ExtractorError(u'Could not find any valid formats')
self._sort_formats(formats)
return {
+45
View File
@@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class MoviezineIE(InfoExtractor):
_VALID_URL = r'https?://www\.moviezine\.se/video/(?P<id>[^?#]+)'
_TEST = {
'url': 'http://www.moviezine.se/video/205866',
'info_dict': {
'id': '205866',
'ext': 'mp4',
'title': 'Oculus - Trailer 1',
'description': 'md5:40cc6790fc81d931850ca9249b40e8a4',
'thumbnail': 're:http://.*\.jpg',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
formats =[{
'format_id': 'sd',
'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'),
'quality': 0,
'ext': 'mp4',
}]
self._sort_formats(formats)
return {
'id': video_id,
'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'),
'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'),
'formats': formats,
'description': self._og_search_description(webpage),
}
+1 -1
View File
@@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
post_view = json.loads(self._html_search_regex(
r'var postView = new app\.PostView\({ post: ({.+?}),', webpage, 'post view'))
r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view'))
youtube_id = post_view['videoExternalId']
title = post_view['title']
+67
View File
@@ -0,0 +1,67 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class NRKIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
_TESTS = [
{
'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
'md5': '12618eef328c9a35c1b47d5583d9c30d',
'info_dict': {
'id': '150533',
'ext': 'flv',
'title': 'Dompap og andre fugler i Piip-Show',
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
}
},
{
'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
'md5': '390b2ce15c0d6aa376ef5059ac9f865e',
'info_dict': {
'id': '154915',
'ext': 'flv',
'title': 'Slik høres internett ut når du er blind',
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage(url, video_id)
video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
data = self._download_json(
'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
if data['usageRights']['isGeoBlocked']:
raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
images = data.get('images')
if images:
thumbnails = images['webImages']
thumbnails.sort(key=lambda image: image['pixelWidth'])
thumbnail = thumbnails[-1]['imageUrl']
else:
thumbnail = None
return {
'id': video_id,
'url': video_url,
'ext': 'flv',
'title': data['title'],
'description': data['description'],
'thumbnail': thumbnail,
}
+49
View File
@@ -0,0 +1,49 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
class RTBFIE(InfoExtractor):
_VALID_URL = r'https?://www.rtbf.be/video/[^\?]+\?id=(?P<id>\d+)'
_TEST = {
'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
'md5': '799f334ddf2c0a582ba80c44655be570',
'info_dict': {
'id': '1921274',
'ext': 'mp4',
'title': 'Les Diables au coeur (épisode 2)',
'description': 'Football - Diables Rouges',
'duration': 3099,
'timestamp': 1398456336,
'upload_date': '20140425',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
data = json.loads(self._html_search_regex(
r'<div class="js-player-embed" data-video="([^"]+)"', page, 'data video'))['data']
video_url = data.get('downloadUrl') or data.get('url')
if data['provider'].lower() == 'youtube':
return self.url_result(video_url, 'Youtube')
return {
'id': video_id,
'url': video_url,
'title': data['title'],
'description': data.get('description') or data.get('subtitle'),
'thumbnail': data['thumbnail']['large'],
'duration': data.get('duration') or data.get('realDuration'),
'timestamp': data['created'],
'view_count': data['viewCount'],
}
+1 -1
View File
@@ -47,7 +47,7 @@ class RutubeIE(InfoExtractor):
author = video.get('author') or {}
options = self._download_json(
'http://rutube.ru/api/play/options/%s/?format=json' %video_id,
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
video_id, 'Downloading options JSON')
m3u8_url = options['video_balancer'].get('m3u8')
+60
View File
@@ -0,0 +1,60 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class SciVeeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?scivee\.tv/node/(?P<id>\d+)'
_TEST = {
'url': 'http://www.scivee.tv/node/62352',
#'md5': 'b16699b74c9e6a120f6772a44960304f',
'info_dict': {
'id': '62352',
'ext': 'mp4',
'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting',
'description': 'md5:81f1710638e11a481358fab1b11059d7',
},
'params': {
# Range HTTP header is ignored
'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
# annotations XML is malformed
annotations = self._download_webpage(
'http://www.scivee.tv/assets/annotations/%s' % video_id, video_id, 'Downloading annotations')
title = self._html_search_regex(r'<title>([^<]+)</title>', annotations, 'title')
description = self._html_search_regex(r'<abstract>([^<]+)</abstract>', annotations, 'abstract', fatal=False)
filesize = int_or_none(self._html_search_regex(
r'<filesize>([^<]+)</filesize>', annotations, 'filesize', fatal=False))
formats = [
{
'url': 'http://www.scivee.tv/assets/audio/%s' % video_id,
'ext': 'mp3',
'format_id': 'audio',
},
{
'url': 'http://www.scivee.tv/assets/video/%s' % video_id,
'ext': 'mp4',
'format_id': 'video',
'filesize': filesize,
},
]
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id,
'formats': formats,
}
+98 -60
View File
@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -8,78 +10,114 @@ from ..utils import (
class SteamIE(InfoExtractor):
_VALID_URL = r"""http://store\.steampowered\.com/
(agecheck/)?
(?P<urltype>video|app)/ #If the page is only for videos or for a game
(?P<gameID>\d+)/?
(?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
"""
_VALID_URL = r"""(?x)
https?://store\.steampowered\.com/
(agecheck/)?
(?P<urltype>video|app)/ #If the page is only for videos or for a game
(?P<gameID>\d+)/?
(?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
|
https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
"""
_VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
_AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
_TEST = {
u"url": u"http://store.steampowered.com/video/105600/",
u"playlist": [
_TESTS = [{
"url": "http://store.steampowered.com/video/105600/",
"playlist": [
{
u"file": u"81300.flv",
u"md5": u"f870007cee7065d7c76b88f0a45ecc07",
u"info_dict": {
u"title": u"Terraria 1.1 Trailer",
u'playlist_index': 1,
"md5": "f870007cee7065d7c76b88f0a45ecc07",
"info_dict": {
'id': '81300',
'ext': 'flv',
"title": "Terraria 1.1 Trailer",
'playlist_index': 1,
}
},
{
u"file": u"80859.flv",
u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751",
u"info_dict": {
u"title": u"Terraria Trailer",
u'playlist_index': 2,
"md5": "61aaf31a5c5c3041afb58fb83cbb5751",
"info_dict": {
'id': '80859',
'ext': 'flv',
"title": "Terraria Trailer",
'playlist_index': 2,
}
}
]
}
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
],
'params': {
'playlistend': 2,
}
}, {
'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
'info_dict': {
'id': 'WB5DvDOOvAY',
'ext': 'mp4',
'upload_date': '20140329',
'title': 'FRONTIERS - Final Greenlight Trailer',
'description': "The final trailer for the Steam Greenlight launch. Hooray, progress! Here's the official Greenlight page: http://steamcommunity.com/sharedfiles/filedetails/?id=242472205",
'uploader': 'AAD Productions',
'uploader_id': 'AtomicAgeDogGames',
}
}]
def _real_extract(self, url):
m = re.match(self._VALID_URL, url, re.VERBOSE)
gameID = m.group('gameID')
videourl = self._VIDEO_PAGE_TEMPLATE % gameID
webpage = self._download_webpage(videourl, gameID)
m = re.match(self._VALID_URL, url)
fileID = m.group('fileID')
if fileID:
videourl = url
playlist_id = fileID
else:
gameID = m.group('gameID')
playlist_id = gameID
videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
webpage = self._download_webpage(videourl, playlist_id)
if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
videourl = self._AGECHECK_TEMPLATE % gameID
videourl = self._AGECHECK_TEMPLATE % playlist_id
self.report_age_confirmation()
webpage = self._download_webpage(videourl, gameID)
webpage = self._download_webpage(videourl, playlist_id)
self.report_extraction(gameID)
game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
webpage, 'game title')
if fileID:
playlist_title = self._html_search_regex(
r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title')
mweb = re.finditer(r'''(?x)
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)",
''', webpage)
videos = [{
'_type': 'url',
'url': vid.group('youtube_id'),
'ie_key': 'Youtube',
} for vid in mweb]
else:
playlist_title = self._html_search_regex(
r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title')
urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
mweb = re.finditer(urlRE, webpage)
namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
titles = re.finditer(namesRE, webpage)
thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
thumbs = re.finditer(thumbsRE, webpage)
videos = []
for vid,vtitle,thumb in zip(mweb,titles,thumbs):
video_id = vid.group('videoID')
title = vtitle.group('videoName')
video_url = vid.group('videoURL')
video_thumb = thumb.group('thumbnail')
if not video_url:
raise ExtractorError(u'Cannot find video url for %s' % video_id)
info = {
'id':video_id,
'url':video_url,
'ext': 'flv',
'title': unescapeHTML(title),
'thumbnail': video_thumb
}
videos.append(info)
return [self.playlist_result(videos, gameID, game_title)]
mweb = re.finditer(r'''(?x)
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)"
(,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},
''', webpage)
titles = re.finditer(
r'<span class="title">(?P<videoName>.+?)</span>', webpage)
thumbs = re.finditer(
r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage)
videos = []
for vid, vtitle, thumb in zip(mweb, titles, thumbs):
video_id = vid.group('videoID')
title = vtitle.group('videoName')
video_url = vid.group('videoURL')
video_thumb = thumb.group('thumbnail')
if not video_url:
raise ExtractorError('Cannot find video url for %s' % video_id)
videos.append({
'id': video_id,
'url': video_url,
'ext': 'flv',
'title': unescapeHTML(title),
'thumbnail': video_thumb
})
if not videos:
raise ExtractorError('Could not find any videos')
return self.playlist_result(videos, playlist_id, playlist_title)
+24 -4
View File
@@ -6,9 +6,9 @@ from .common import InfoExtractor
class SyfyIE(InfoExtractor):
_VALID_URL = r'https?://www\.syfy\.com/videos/.+?vid:(?P<id>\d+)'
_VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P<id>[0-9]+)|(?!videos)(?P<video_name>[^/]+)(?:$|[?#]))'
_TEST = {
_TESTS = [{
'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458',
'md5': 'e07de1d52c7278adbb9b9b1c93a66849',
'info_dict': {
@@ -18,10 +18,30 @@ class SyfyIE(InfoExtractor):
'description': 'Listen to what insights George Lucas give his daughter Amanda.',
},
'add_ie': ['ThePlatform'],
}
}, {
'url': 'http://www.syfy.com/wilwheaton',
'md5': '94dfa54ee3ccb63295b276da08c415f6',
'info_dict': {
'id': '4yoffOOXC767',
'ext': 'flv',
'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.',
'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.',
},
'add_ie': ['ThePlatform'],
'skip': 'Blocked outside the US',
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_name = mobj.group('video_name')
if video_name:
generic_webpage = self._download_webpage(url, video_name)
video_id = self._search_regex(
r'<iframe.*?class="video_iframe_page"\s+src="/_utils/video/thP_video_controller.php.*?_vid([0-9]+)">',
generic_webpage, 'video ID')
url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % (
video_name, video_name, video_id)
else:
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
return self.url_result(self._og_search_video_url(webpage))
-3
View File
@@ -3,9 +3,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class TeamcocoIE(InfoExtractor):
+9 -9
View File
@@ -51,16 +51,13 @@ class TEDIE(SubtitlesInfoExtractor):
}
}, {
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
'md5': '49144e345a899b8cb34d315f3b9cfeeb',
'info_dict': {
'id': '1972',
'ext': 'flv',
'ext': 'mp4',
'title': 'Be passionate. Be courageous. Be your best.',
'uploader': 'Gabby Giffords and Mark Kelly',
'description': 'md5:d89e1d8ebafdac8e55df4c219ecdbfe9',
},
'params': {
# rtmp download
'skip_download': True,
'description': 'md5:5174aed4d0f16021b704120360f72b92',
},
}]
@@ -97,7 +94,7 @@ class TEDIE(SubtitlesInfoExtractor):
playlist_info = info['playlist']
playlist_entries = [
self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key())
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
for talk in info['talks']
]
return self.playlist_result(
@@ -163,7 +160,7 @@ class TEDIE(SubtitlesInfoExtractor):
sub_lang_list[l] = url
return sub_lang_list
else:
self._downloader.report_warning(u'video doesn\'t have subtitles')
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
def _watch_info(self, url, name):
@@ -178,7 +175,10 @@ class TEDIE(SubtitlesInfoExtractor):
title = self._html_search_regex(
r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
description = self._html_search_regex(
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
[
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
r'(?s)<p><strong>About this talk:</strong>\s+(.*?)</p>',
],
webpage, 'description', fatal=False)
return {
+1 -1
View File
@@ -52,7 +52,7 @@ class ThePlatformIE(InfoExtractor):
head = meta.find(_x('smil:head'))
body = meta.find(_x('smil:body'))
f4m_node = body.find(_x('smil:seq/smil:video'))
f4m_node = body.find(_x('smil:seq//smil:video'))
if f4m_node is not None:
f4m_url = f4m_node.attrib['src']
if 'manifest.f4m?' not in f4m_url:
+51 -21
View File
@@ -17,10 +17,38 @@ from ..utils import (
RegexNotFoundError,
std_headers,
unsmuggle_url,
urlencode_postdata,
)
class VimeoIE(SubtitlesInfoExtractor):
class VimeoBaseInfoExtractor(InfoExtractor):
_NETRC_MACHINE = 'vimeo'
_LOGIN_REQUIRED = False
def _login(self):
(username, password) = self._get_login_info()
if username is None:
if self._LOGIN_REQUIRED:
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return
self.report_login()
login_url = 'https://vimeo.com/log_in'
webpage = self._download_webpage(login_url, None, False)
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
data = urlencode_postdata({
'email': username,
'password': password,
'action': 'login',
'service': 'vimeo',
'token': token,
})
login_request = compat_urllib_request.Request(login_url, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
login_request.add_header('Cookie', 'xsrft=%s' % token)
self._download_webpage(login_request, None, False, 'Wrong login info')
class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
@@ -33,7 +61,6 @@ class VimeoIE(SubtitlesInfoExtractor):
(?:videos?/)?
(?P<id>[0-9]+)
/?(?:[?&].*)?(?:[#].*)?$'''
_NETRC_MACHINE = 'vimeo'
IE_NAME = 'vimeo'
_TESTS = [
{
@@ -111,25 +138,6 @@ class VimeoIE(SubtitlesInfoExtractor):
else:
return super(VimeoIE, cls).suitable(url)
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
self.report_login()
login_url = 'https://vimeo.com/log_in'
webpage = self._download_webpage(login_url, None, False)
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
data = compat_urllib_parse.urlencode({'email': username,
'password': password,
'action': 'login',
'service': 'vimeo',
'token': token,
})
login_request = compat_urllib_request.Request(login_url, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
login_request.add_header('Cookie', 'xsrft=%s' % token)
self._download_webpage(login_request, None, False, 'Wrong login info')
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword', None)
if password is None:
@@ -438,3 +446,25 @@ class VimeoReviewIE(InfoExtractor):
video_id = mobj.group('id')
player_url = 'https://player.vimeo.com/player/' + video_id
return self.url_result(player_url, 'Vimeo', video_id)
class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
IE_NAME = 'vimeo:watchlater'
IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
_VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
_LOGIN_REQUIRED = True
_TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
def _real_initialize(self):
self._login()
def _page_url(self, base_url, pagenum):
url = '%s/page:%d/' % (base_url, pagenum)
request = compat_urllib_request.Request(url)
# Set the header to get a partial html page with the ids,
# the normal page doesn't contain them.
request.add_header('X-Requested-With', 'XMLHttpRequest')
return request
def _real_extract(self, url):
return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')
+66
View File
@@ -0,0 +1,66 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
parse_duration,
qualities,
)
class VuClipIE(InfoExtractor):
_VALID_URL = r'http://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://m.vuclip.com/w?cid=843902317&fid=63532&z=1007&nvar&frm=index.html&bu=4757321434',
'md5': '92ac9d1ccefec4f0bb474661ab144fcf',
'info_dict': {
'id': '843902317',
'ext': '3gp',
'title': 'Movie Trailer: Noah',
'duration': 139,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
ad_m = re.search(
r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
if ad_m:
urlr = compat_urllib_parse_urlparse(url)
adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1)
webpage = self._download_webpage(
adfree_url, video_id, note='Download post-ad page')
links_code = self._search_regex(
r'(?s)<div class="social align_c".*?>(.*?)<hr\s*/?>', webpage,
'links')
title = self._html_search_regex(
r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip()
quality_order = qualities(['Reg', 'Hi'])
formats = []
for url, q in re.findall(
r'<a href="(?P<url>[^"]+)".*?>(?P<q>[^<]+)</a>', links_code):
format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q
formats.append({
'format_id': format_id,
'url': url,
'quality': quality_order(q),
})
self._sort_formats(formats)
duration = parse_duration(self._search_regex(
r'\(([0-9:]+)\)</span></h1>', webpage, 'duration', fatal=False))
return {
'id': video_id,
'formats': formats,
'title': title,
'duration': duration,
}
+28 -14
View File
@@ -14,8 +14,8 @@ from ..utils import (
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen'
_VALID_URL = r'https?://screen\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
IE_DESC = 'Yahoo screen and movies'
_VALID_URL = r'https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
_TESTS = [
{
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@@ -37,6 +37,16 @@ class YahooIE(InfoExtractor):
'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
},
},
{
'url': 'https://movies.yahoo.com/video/world-loves-spider-man-190819223.html',
'md5': '410b7104aa9893b765bc22787a22f3d9',
'info_dict': {
'id': '516ed8e2-2c4f-339f-a211-7a8b49d30845',
'ext': 'mp4',
'title': 'The World Loves Spider-Man',
'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''',
}
}
]
def _real_extract(self, url):
@@ -44,13 +54,20 @@ class YahooIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
items_json = self._search_regex(r'mediaItems: ({.*?})$',
webpage, 'items', flags=re.MULTILINE)
items = json.loads(items_json)
info = items['mediaItems']['query']['results']['mediaObj'][0]
# The 'meta' field is not always in the video webpage, we request it
# from another page
long_id = info['id']
items_json = self._search_regex(
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
default=None)
if items_json is None:
long_id = self._search_regex(
r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
webpage, 'content ID')
video_id = long_id
else:
items = json.loads(items_json)
info = items['mediaItems']['query']['results']['mediaObj'][0]
# The 'meta' field is not always in the video webpage, we request it
# from another page
long_id = info['id']
return self._get_info(long_id, video_id)
def _get_info(self, long_id, video_id):
@@ -104,7 +121,7 @@ class YahooNewsIE(YahooIE):
IE_NAME = 'yahoo:news'
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
_TEST = {
_TESTS = [{
'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
'md5': '67010fdf3a08d290e060a4dd96baa07b',
'info_dict': {
@@ -113,10 +130,7 @@ class YahooNewsIE(YahooIE):
'title': 'China Moses Is Crazy About the Blues',
'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
},
}
# Overwrite YahooIE properties we don't want
_TESTS = []
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
+24 -20
View File
@@ -210,23 +210,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
# Dash webm
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40},
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40},
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40},
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40},
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
# Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50},
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
# RTMP (unnamed)
'_rtmp': {'protocol': 'rtmp'},
@@ -252,7 +252,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"info_dict": {
u"upload_date": u"20120506",
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
u"description": u"md5:5b292926389560516e384ac437c0ec07",
u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
u"uploader": u"Icona Pop",
u"uploader_id": u"IconaPop"
}
@@ -304,7 +304,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u'id': u'IB3lcPjvWLA',
u'ext': u'm4a',
u'title': u'Afrojack - The Spark ft. Spree Wilson',
u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8',
u'uploader': u'AfrojackVEVO',
u'uploader_id': u'AfrojackVEVO',
u'upload_date': u'20131011',
@@ -1082,9 +1082,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
break
if 'token' not in video_info:
if 'reason' in video_info:
raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
raise ExtractorError(
u'YouTube said: %s' % video_info['reason'][0],
expected=True, video_id=video_id)
else:
raise ExtractorError(u'"token" parameter not in video info for unknown reason')
raise ExtractorError(
u'"token" parameter not in video info for unknown reason',
video_id=video_id)
if 'view_count' in video_info:
view_count = int(video_info['view_count'][0])
@@ -1113,7 +1117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# title
if 'title' in video_info:
video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
video_title = video_info['title'][0]
else:
self._downloader.report_warning(u'Unable to extract video title')
video_title = u'_'
+35 -9
View File
@@ -594,13 +594,15 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
class ExtractorError(Exception):
"""Error during info extraction."""
def __init__(self, msg, tb=None, expected=False, cause=None):
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
""" tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
"""
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
expected = True
if video_id is not None:
msg = video_id + ': ' + msg
if not expected:
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
super(ExtractorError, self).__init__(msg)
@@ -608,6 +610,7 @@ class ExtractorError(Exception):
self.traceback = tb
self.exc_info = sys.exc_info() # preserve original exception
self.cause = cause
self.video_id = video_id
def format_traceback(self):
if self.traceback is None:
@@ -923,9 +926,6 @@ def _windows_write_string(s, out):
2: -12,
}
def ucs2_len(s):
return sum((2 if ord(c) > 0xffff else 1) for c in s)
fileno = out.fileno()
if fileno not in WIN_OUTPUT_IDS:
return False
@@ -959,13 +959,25 @@ def _windows_write_string(s, out):
if not_a_console(h):
return False
remaining = ucs2_len(s)
while remaining > 0:
def next_nonbmp_pos(s):
try:
return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
except StopIteration:
return len(s)
while s:
count = min(next_nonbmp_pos(s), 1024)
ret = WriteConsoleW(
h, s, min(remaining, 1024), ctypes.byref(written), None)
h, s, count if count else 2, ctypes.byref(written), None)
if ret == 0:
raise OSError('Failed to write string')
remaining -= written.value
if not count: # We just wrote a non-BMP character
assert written.value == 2
s = s[1:]
else:
assert written.value > 0
s = s[written.value:]
return True
@@ -1236,7 +1248,10 @@ class HEADRequest(compat_urllib_request.Request):
return "HEAD"
def int_or_none(v, scale=1, default=None):
def int_or_none(v, scale=1, default=None, get_attr=None):
if get_attr:
if v is not None:
v = getattr(v, get_attr, None)
return default if v is None else (int(v) // scale)
@@ -1397,3 +1412,14 @@ US_RATINGS = {
def strip_jsonp(code):
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
def qualities(quality_ids):
""" Get a numeric quality value out of a list of possible values """
def q(qid):
try:
return quality_ids.index(qid)
except ValueError:
return -1
return q
+1 -1
View File
@@ -1,2 +1,2 @@
__version__ = '2014.04.19'
__version__ = '2014.04.30'