release 2014.04.30

Merge branch 'master' of github.com:rg3/youtube-dl
[YoutubeDL] Add simple tests for format_note (Closes #2825 )
2026-06-12 07:30:09 +00:00 · 2014-04-30 02:05:03 +02:00 · 2014-04-30 02:04:55 +02:00 · 2014-04-30 02:02:41 +02:00 · 2014-04-30 01:52:05 +02:00 · 2014-04-30 01:50:33 +02:00
63 changed files with 1790 additions and 523 deletions
@@ -250,6 +250,7 @@ which means you can modify it, redistribute it or use it however you like.
                                     default
    --embed-subs                     embed subtitles in the video (only for mp4
                                     videos)
+    --embed-thumbnail                embed thumbnail in the audio as cover art
    --add-metadata                   write metadata to the video file
    --xattrs                         write metadata to the video file's xattrs
                                     (using dublin core and xdg standards)
@@ -74,13 +74,19 @@ class FakeYDL(YoutubeDL):
            old_report_warning(message)
        self.report_warning = types.MethodType(report_warning, self)

-def gettestcases():
+
+def gettestcases(include_onlymatching=False):
    for ie in youtube_dl.extractor.gen_extractors():
        t = getattr(ie, '_TEST', None)
        if t:
-            t['name'] = type(ie).__name__[:-len('IE')]
-            yield t
-        for t in getattr(ie, '_TESTS', []):
+            assert not hasattr(ie, '_TESTS'), \
+                '%s has _TEST and _TESTS' % type(ie).__name__
+            tests = [t]
+        else:
+            tests = getattr(ie, '_TESTS', [])
+        for t in tests:
+            if not include_onlymatching and t.get('only_matching', False):
+                continue
            t['name'] = type(ie).__name__[:-len('IE')]
            yield t

@@ -128,3 +134,17 @@ def expect_info_dict(self, expected_dict, got_dict):
            missing_keys,
            'Missing keys in test definition: %s' % (
                ', '.join(sorted(missing_keys))))
+
+
+def assertRegexpMatches(self, text, regexp, msg=None):
+    if hasattr(self, 'assertRegexpMatches'):
+        return self.assertRegexpMatches(text, regexp, msg)
+    else:
+        m = re.match(regexp, text)
+        if not m:
+            note = 'Regexp didn\'t match: %r not found in %r' % (regexp, text)
+            if msg is None:
+                msg = note
+            else:
+                msg = note + ', ' + msg
+            self.assertTrue(m, msg)
@@ -8,7 +8,7 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-from test.helper import FakeYDL
+from test.helper import FakeYDL, assertRegexpMatches
 from youtube_dl import YoutubeDL
 from youtube_dl.extractor import YoutubeIE

@@ -274,6 +274,12 @@ class TestFormatSelection(unittest.TestCase):
        # Replace missing fields with 'NA'
        self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')

+    def test_format_note(self):
+        ydl = YoutubeDL()
+        self.assertEqual(ydl._format_note({}), '')
+        assertRegexpMatches(self, ydl._format_note({
+            'vbr': 10,
+        }), '^x\s*10k$')

 if __name__ == '__main__':
    unittest.main()
@@ -49,6 +49,7 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
        self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
+        self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])

    def test_youtube_channel_matching(self):
        assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
@@ -76,20 +77,20 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])

    def test_justin_tv_channelid_matching(self):
-        self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
-        self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
-        self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv"))
-        self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv"))
-        self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv"))
-        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv"))
-        self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/"))
-        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/"))
+        self.assertTrue(JustinTVIE.suitable('justin.tv/vanillatv'))
+        self.assertTrue(JustinTVIE.suitable('twitch.tv/vanillatv'))
+        self.assertTrue(JustinTVIE.suitable('www.justin.tv/vanillatv'))
+        self.assertTrue(JustinTVIE.suitable('www.twitch.tv/vanillatv'))
+        self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv'))
+        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv'))
+        self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv/'))
+        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/'))

    def test_justintv_videoid_matching(self):
-        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483"))
+        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/b/328087483'))

    def test_justin_tv_chapterid_matching(self):
-        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
+        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))

    def test_youtube_extract(self):
        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
@@ -105,7 +106,7 @@ class TestAllURLsMatching(unittest.TestCase):

    def test_no_duplicates(self):
        ies = gen_extractors()
-        for tc in gettestcases():
+        for tc in gettestcases(include_onlymatching=True):
            url = tc['url']
            for ie in ies:
                if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
@@ -156,6 +157,18 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch(
            'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
+            ['ComedyCentralShows'])

    def test_yahoo_https(self):
        # https://github.com/rg3/youtube-dl/issues/2701
@@ -163,5 +176,6 @@ class TestAllURLsMatching(unittest.TestCase):
            'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
            ['Yahoo'])

+
 if __name__ == '__main__':
    unittest.main()
@@ -43,6 +43,7 @@ from youtube_dl.extractor import (
    XTubeUserIE,
    InstagramUserIE,
    CSpanIE,
+    AolIE,
 )


@@ -191,8 +192,8 @@ class TestPlaylists(unittest.TestCase):
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], 'dezhurnyi_angel')
        self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)')
-        self.assertTrue(len(result['entries']) >= 36)
-        
+        self.assertTrue(len(result['entries']) >= 23)
+
    def test_ivi_compilation_season(self):
        dl = FakeYDL()
        ie = IviCompilationIE(dl)
@@ -200,7 +201,7 @@ class TestPlaylists(unittest.TestCase):
        self.assertIsPlaylist(result)
        self.assertEqual(result['id'], 'dezhurnyi_angel/season2')
        self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон')
-        self.assertTrue(len(result['entries']) >= 20)
+        self.assertTrue(len(result['entries']) >= 7)
        
    def test_imdb_list(self):
        dl = FakeYDL()
@@ -327,6 +328,16 @@ class TestPlaylists(unittest.TestCase):
        whole_duration = sum(e['duration'] for e in result['entries'])
        self.assertEqual(whole_duration, 14855)

+    def test_aol_playlist(self):
+        dl = FakeYDL()
+        ie = AolIE(dl)
+        result = ie.extract(
+            'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], '152147')
+        self.assertEqual(
+            result['title'], 'Brace Yourself - Today\'s Weirdest News')
+        self.assertTrue(len(result['entries']) >= 10)

 if __name__ == '__main__':
    unittest.main()
@@ -181,7 +181,7 @@ class TestTedSubtitles(BaseTestSubtitles):
        self.DL.params['writesubtitles'] = True
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles.keys()), 28)
+        self.assertTrue(len(subtitles.keys()) >= 28)

    def test_list_subtitles(self):
        self.DL.expect_warning(u'Automatic Captions not supported by this server')
@@ -287,7 +287,7 @@ class YoutubeDL(object):
        return self.to_stdout(message, skip_eol, check_quiet=True)

    def _write_string(self, s, out=None):
-        write_string(s, out=out, encoding=self.get_encoding())
+        write_string(s, out=out, encoding=self.params.get('encoding'))

    def to_stdout(self, message, skip_eol=False, check_quiet=False):
        """Print message to stdout if not in quiet mode."""
@@ -936,7 +936,7 @@ class YoutubeDL(object):
                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
                                subfile.write(sub)
                except (OSError, IOError):
-                    self.report_error('Cannot write subtitles file ' + descfn)
+                    self.report_error('Cannot write subtitles file ' + sub_filename)
                    return

        if self.params.get('writeinfojson', False):
@@ -1139,57 +1139,57 @@ class YoutubeDL(object):
            res = default
        return res

-    def list_formats(self, info_dict):
-        def format_note(fdict):
-            res = ''
-            if fdict.get('ext') in ['f4f', 'f4m']:
-                res += '(unsupported) '
-            if fdict.get('format_note') is not None:
-                res += fdict['format_note'] + ' '
-            if fdict.get('tbr') is not None:
-                res += '%4dk ' % fdict['tbr']
-            if fdict.get('container') is not None:
-                if res:
-                    res += ', '
-                res += '%s container' % fdict['container']
-            if (fdict.get('vcodec') is not None and
-                    fdict.get('vcodec') != 'none'):
-                if res:
-                    res += ', '
-                res += fdict['vcodec']
-                if fdict.get('vbr') is not None:
-                    res += '@'
-            elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
-                res += 'video@'
+    def _format_note(self, fdict):
+        res = ''
+        if fdict.get('ext') in ['f4f', 'f4m']:
+            res += '(unsupported) '
+        if fdict.get('format_note') is not None:
+            res += fdict['format_note'] + ' '
+        if fdict.get('tbr') is not None:
+            res += '%4dk ' % fdict['tbr']
+        if fdict.get('container') is not None:
+            if res:
+                res += ', '
+            res += '%s container' % fdict['container']
+        if (fdict.get('vcodec') is not None and
+                fdict.get('vcodec') != 'none'):
+            if res:
+                res += ', '
+            res += fdict['vcodec']
            if fdict.get('vbr') is not None:
-                res += '%4dk' % fdict['vbr']
-            if fdict.get('acodec') is not None:
-                if res:
-                    res += ', '
-                if fdict['acodec'] == 'none':
-                    res += 'video only'
-                else:
-                    res += '%-5s' % fdict['acodec']
-            elif fdict.get('abr') is not None:
-                if res:
-                    res += ', '
-                res += 'audio'
-            if fdict.get('abr') is not None:
-                res += '@%3dk' % fdict['abr']
-            if fdict.get('asr') is not None:
-                res += ' (%5dHz)' % fdict['asr']
-            if fdict.get('filesize') is not None:
-                if res:
-                    res += ', '
-                res += format_bytes(fdict['filesize'])
-            return res
+                res += '@'
+        elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
+            res += 'video@'
+        if fdict.get('vbr') is not None:
+            res += '%4dk' % fdict['vbr']
+        if fdict.get('acodec') is not None:
+            if res:
+                res += ', '
+            if fdict['acodec'] == 'none':
+                res += 'video only'
+            else:
+                res += '%-5s' % fdict['acodec']
+        elif fdict.get('abr') is not None:
+            if res:
+                res += ', '
+            res += 'audio'
+        if fdict.get('abr') is not None:
+            res += '@%3dk' % fdict['abr']
+        if fdict.get('asr') is not None:
+            res += ' (%5dHz)' % fdict['asr']
+        if fdict.get('filesize') is not None:
+            if res:
+                res += ', '
+            res += format_bytes(fdict['filesize'])
+        return res

+    def list_formats(self, info_dict):
        def line(format, idlen=20):
            return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
                format['format_id'],
                format['ext'],
                self.format_resolution(format),
-                format_note(format),
+                self._format_note(format),
            ))

        formats = info_dict.get('formats', [info_dict])
@@ -1197,8 +1197,8 @@ class YoutubeDL(object):
                    max(len(f['format_id']) for f in formats))
        formats_s = [line(f, idlen) for f in formats]
        if len(formats) > 1:
-            formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
-            formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
+            formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
+            formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'

        header_line = line({
            'format_id': 'format code', 'ext': 'extension',
@@ -92,6 +92,8 @@ from .extractor import gen_extractors
 from .version import __version__
 from .YoutubeDL import YoutubeDL
 from .postprocessor import (
+    AtomicParsleyPP,
+    FFmpegAudioFixPP,
    FFmpegMetadataPP,
    FFmpegVideoConvertor,
    FFmpegExtractAudioPP,
@@ -503,6 +505,8 @@ def parseOpts(overrideArguments=None):
            help='do not overwrite post-processed files; the post-processed files are overwritten by default')
    postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
            help='embed subtitles in the video (only for mp4 videos)')
+    postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False,
+            help='embed thumbnail in the audio as cover art')
    postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
            help='write metadata to the video file')
    postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
@@ -808,6 +812,10 @@ def _real_main(argv=None):
            ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
        if opts.xattrs:
            ydl.add_post_processor(XAttrMetadataPP())
+        if opts.embedthumbnail:
+            if not opts.addmetadata:
+                ydl.add_post_processor(FFmpegAudioFixPP())
+            ydl.add_post_processor(AtomicParsleyPP())

        # Update version
        if opts.update_self:
@@ -14,6 +14,8 @@ from ..utils import (


 class HttpFD(FileDownloader):
+    _TEST_FILE_SIZE = 10241
+
    def real_download(self, filename, info_dict):
        url = info_dict['url']
        tmpfilename = self.temp_name(filename)
@@ -28,8 +30,10 @@ class HttpFD(FileDownloader):
        basic_request = compat_urllib_request.Request(url, None, headers)
        request = compat_urllib_request.Request(url, None, headers)

-        if self.params.get('test', False):
-            request.add_header('Range', 'bytes=0-10240')
+        is_test = self.params.get('test', False)
+
+        if is_test:
+            request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))

        # Establish possible resume length
        if os.path.isfile(encodeFilename(tmpfilename)):
@@ -100,6 +104,15 @@ class HttpFD(FileDownloader):
            return False

        data_len = data.info().get('Content-length', None)
+
+        # Range HTTP header may be ignored/unsupported by a webserver
+        # (e.g. extractor/scivee.py, extractor/bambuser.py).
+        # However, for a test we still would like to download just a piece of a file.
+        # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
+        # block size when downloading a file.
+        if is_test and data_len > self._TEST_FILE_SIZE:
+            data_len = self._TEST_FILE_SIZE
+
        if data_len is not None:
            data_len = int(data_len) + resume_len
            min_data_len = self.params.get("min_filesize", None)
@@ -118,7 +131,7 @@ class HttpFD(FileDownloader):
        while True:
            # Download and write
            before = time.time()
-            data_block = data.read(block_size)
+            data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
            after = time.time()
            if len(data_block) == 0:
                break
@@ -162,6 +175,9 @@ class HttpFD(FileDownloader):
                'speed': speed,
            })

+            if is_test and byte_counter == data_len:
+                break
+
            # Apply rate limit
            self.slow_down(start, byte_counter - resume_len)

@@ -20,6 +20,7 @@ from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
+from .bilibili import BiliBiliIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
@@ -40,6 +41,7 @@ from .cinemassacre import CinemassacreIE
 from .clipfish import ClipfishIE
 from .cliphunter import CliphunterIE
 from .clipsyndicate import ClipsyndicateIE
+from .clubic import ClubicIE
 from .cmt import CMTIE
 from .cnet import CNETIE
 from .cnn import (
@@ -158,6 +160,7 @@ from .mofosex import MofosexIE
 from .mooshare import MooshareIE
 from .morningstar import MorningstarIE
 from .motorsport import MotorsportIE
+from .moviezine import MoviezineIE
 from .movshare import MovShareIE
 from .mtv import (
    MTVIE,
@@ -181,10 +184,12 @@ from .nfb import NFBIE
 from .nhl import NHLIE, NHLVideocenterIE
 from .niconico import NiconicoIE
 from .ninegag import NineGagIE
+from .noco import NocoIE
 from .normalboots import NormalbootsIE
 from .novamov import NovaMovIE
 from .nowness import NownessIE
 from .nowvideo import NowVideoIE
+from .nrk import NRKIE
 from .ntv import NTVIE
 from .oe1 import OE1IE
 from .ooyala import OoyalaIE
@@ -206,6 +211,7 @@ from .ringtv import RingTVIE
 from .ro220 import Ro220IE
 from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
+from .rtbf import RTBFIE
 from .rtlnow import RTLnowIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE
@@ -217,6 +223,7 @@ from .rutube import (
 )
 from .rutv import RUTVIE
 from .savefrom import SaveFromIE
+from .scivee import SciVeeIE
 from .servingsys import ServingSysIE
 from .sina import SinaIE
 from .slideshare import SlideshareIE
@@ -251,6 +258,7 @@ from .tf1 import TF1IE
 from .theplatform import ThePlatformIE
 from .thisav import ThisAVIE
 from .tinypic import TinyPicIE
+from .tlc import TlcIE, TlcDeIE
 from .toutv import TouTvIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
@@ -288,11 +296,13 @@ from .vimeo import (
    VimeoAlbumIE,
    VimeoGroupsIE,
    VimeoReviewIE,
+    VimeoWatchLaterIE,
 )
 from .vine import VineIE
 from .viki import VikiIE
 from .vk import VKIE
 from .vube import VubeIE
+from .vuclip import VuClipIE
 from .washingtonpost import WashingtonPostIE
 from .wat import WatIE
 from .wdr import (
@@ -8,7 +8,18 @@ from .fivemin import FiveMinIE

 class AolIE(InfoExtractor):
    IE_NAME = 'on.aol.com'
-    _VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
+    _VALID_URL = r'''(?x)
+        (?:
+            aol-video:|
+            http://on\.aol\.com/
+            (?:
+                video/.*-|
+                playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
+            )
+        )
+        (?P<id>[0-9]+)
+        (?:$|\?)
+    '''

    _TEST = {
        'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@@ -24,5 +35,31 @@ class AolIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        self.to_screen('Downloading 5min.com video %s' % video_id)
+
+        playlist_id = mobj.group('playlist_id')
+        if playlist_id and not self._downloader.params.get('noplaylist'):
+            self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+
+            webpage = self._download_webpage(url, playlist_id)
+            title = self._html_search_regex(
+                r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
+            playlist_html = self._search_regex(
+                r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
+                'playlist HTML')
+            entries = [{
+                '_type': 'url',
+                'url': 'aol-video:%s' % m.group('id'),
+                'ie_key': 'Aol',
+            } for m in re.finditer(
+                r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
+                playlist_html)]
+
+            return {
+                '_type': 'playlist',
+                'id': playlist_id,
+                'display_id': mobj.group('playlist_display_id'),
+                'title': title,
+                'entries': entries,
+            }
+
        return FiveMinIE._build_result(video_id)
@@ -74,7 +74,8 @@ class ArteTVPlus7IE(InfoExtractor):
        return self._extract_from_webpage(webpage, video_id, lang)

    def _extract_from_webpage(self, webpage, video_id, lang):
-        json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
+        json_url = self._html_search_regex(
+            r'arte_vp_url="(.*?)"', webpage, 'json vp url')
        return self._extract_from_json_url(json_url, video_id, lang)

    def _extract_from_json_url(self, json_url, video_id, lang):
@@ -0,0 +1,106 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_parse_qs,
+    ExtractorError,
+    int_or_none,
+    unified_strdate,
+)
+
+
+class BiliBiliIE(InfoExtractor):
+    _VALID_URL = r'http://www\.bilibili\.tv/video/av(?P<id>[0-9]+)/'
+
+    _TEST = {
+        'url': 'http://www.bilibili.tv/video/av1074402/',
+        'md5': '2c301e4dab317596e837c3e7633e7d86',
+        'info_dict': {
+            'id': '1074402',
+            'ext': 'flv',
+            'title': '【金坷垃】金泡沫',
+            'duration': 308,
+            'upload_date': '20140420',
+            'thumbnail': 're:^https?://.+\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        video_code = self._search_regex(
+            r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
+
+        title = self._html_search_meta(
+            'media:title', video_code, 'title', fatal=True)
+        duration_str = self._html_search_meta(
+            'duration', video_code, 'duration')
+        if duration_str is None:
+            duration = None
+        else:
+            duration_mobj = re.match(
+                r'^T(?:(?P<hours>[0-9]+)H)?(?P<minutes>[0-9]+)M(?P<seconds>[0-9]+)S$',
+                duration_str)
+            duration = (
+                int_or_none(duration_mobj.group('hours'), default=0) * 3600 +
+                int(duration_mobj.group('minutes')) * 60 +
+                int(duration_mobj.group('seconds')))
+        upload_date = unified_strdate(self._html_search_meta(
+            'uploadDate', video_code, fatal=False))
+        thumbnail = self._html_search_meta(
+            'thumbnailUrl', video_code, 'thumbnail', fatal=False)
+
+        player_params = compat_parse_qs(self._html_search_regex(
+            r'<iframe .*?class="player" src="https://secure.bilibili.tv/secure,([^"]+)"',
+            webpage, 'player params'))
+
+        if 'cid' in player_params:
+            cid = player_params['cid'][0]
+
+            lq_doc = self._download_xml(
+                'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
+                video_id,
+                note='Downloading LQ video info'
+            )
+            lq_durl = lq_doc.find('.//durl')
+            formats = [{
+                'format_id': 'lq',
+                'quality': 1,
+                'url': lq_durl.find('./url').text,
+                'filesize': int_or_none(
+                    lq_durl.find('./size'), get_attr='text'),
+            }]
+
+            hq_doc = self._download_xml(
+                'http://interface.bilibili.cn/playurl?cid=%s' % cid,
+                video_id,
+                note='Downloading HQ video info',
+                fatal=False,
+            )
+            if hq_doc is not False:
+                hq_durl = hq_doc.find('.//durl')
+                formats.append({
+                    'format_id': 'hq',
+                    'quality': 2,
+                    'ext': 'flv',
+                    'url': hq_durl.find('./url').text,
+                    'filesize': int_or_none(
+                        hq_durl.find('./size'), get_attr='text'),
+                })
+        else:
+            raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
+
+        self._sort_formats(formats)
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'duration': duration,
+            'upload_date': upload_date,
+            'thumbnail': thumbnail,
+        }
@@ -4,39 +4,72 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)


 class BRIE(InfoExtractor):
-    IE_DESC = "Bayerischer Rundfunk Mediathek"
-    _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$"
-    _BASE_URL = "http://www.br.de"
+    IE_DESC = 'Bayerischer Rundfunk Mediathek'
+    _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P<id>[a-z0-9\-]+)\.html'
+    _BASE_URL = 'http://www.br.de'

    _TESTS = [
        {
-            "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
-            "md5": "c4f83cf0f023ba5875aba0bf46860df2",
-            "info_dict": {
-                "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
-                "ext": "mp4",
-                "title": "Feiern und Verzichten",
-                "description": "Anselm Grün: Feiern und Verzichten",
-                "uploader": "BR/Birgit Baier",
-                "upload_date": "20140301"
+            'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html',
+            'md5': 'c4f83cf0f023ba5875aba0bf46860df2',
+            'info_dict': {
+                'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532',
+                'ext': 'mp4',
+                'title': 'Feiern und Verzichten',
+                'description': 'Anselm Grün: Feiern und Verzichten',
+                'uploader': 'BR/Birgit Baier',
+                'upload_date': '20140301',
            }
        },
        {
-            "url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html",
-            "md5": "ab451b09d861dbed7d7cc9ab0be19ebe",
-            "info_dict": {
-                "id": "2c060e69-3a27-4e13-b0f0-668fac17d812",
-                "ext": "mp4",
-                "title": "Über den Pass",
-                "description": "Die Eroberung der Alpen: Über den Pass",
-                "uploader": None,
-                "upload_date": None
+            'url': 'http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html',
+            'md5': 'ab451b09d861dbed7d7cc9ab0be19ebe',
+            'info_dict': {
+                'id': '2c060e69-3a27-4e13-b0f0-668fac17d812',
+                'ext': 'mp4',
+                'title': 'Über den Pass',
+                'description': 'Die Eroberung der Alpen: Über den Pass',
            }
-        }
+        },
+        {
+            'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html',
+            'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820',
+            'info_dict': {
+                'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab',
+                'ext': 'aac',
+                'title': '"Keine neuen Schulden im nächsten Jahr"',
+                'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
+            }
+        },
+        {
+            'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html',
+            'md5': 'dbab0aef2e047060ea7a21fc1ce1078a',
+            'info_dict': {
+                'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
+                'ext': 'mp4',
+                'title': 'Umweltbewusster Häuslebauer',
+                'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
+            }
+        },
+        {
+            'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html',
+            'md5': '23bca295f1650d698f94fc570977dae3',
+            'info_dict': {
+                'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
+                'ext': 'mp4',
+                'title': 'Folge 1 - Metaphysik',
+                'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
+                'uploader': 'Eva Maria Steimle',
+                'upload_date': '20140117',
+            }
+        },
    ]

    def _real_extract(self, url):
@@ -44,56 +77,63 @@ class BRIE(InfoExtractor):
        display_id = mobj.group('id')
        page = self._download_webpage(url, display_id)
        xml_url = self._search_regex(
-            r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
+            r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
        xml = self._download_xml(self._BASE_URL + xml_url, None)

-        videos = []
-        for xml_video in xml.findall("video"):
-            video = {
-                "id": xml_video.get("externalId"),
-                "title": xml_video.find("title").text,
-                "formats": self._extract_formats(xml_video.find("assets")),
-                "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
-                "description": " ".join(xml_video.find("shareTitle").text.splitlines()),
-                "webpage_url": xml_video.find("permalink").text
-            }
-            if xml_video.find("author").text:
-                video["uploader"] = xml_video.find("author").text
-            if xml_video.find("broadcastDate").text:
-                video["upload_date"] =  "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
-            videos.append(video)
+        medias = []

-        if len(videos) > 1:
+        for xml_media in xml.findall('video') + xml.findall('audio'):
+            media = {
+                'id': xml_media.get('externalId'),
+                'title': xml_media.find('title').text,
+                'formats': self._extract_formats(xml_media.find('assets')),
+                'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
+                'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
+                'webpage_url': xml_media.find('permalink').text
+            }
+            if xml_media.find('author').text:
+                media['uploader'] = xml_media.find('author').text
+            if xml_media.find('broadcastDate').text:
+                media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.')))
+            medias.append(media)
+
+        if len(medias) > 1:
            self._downloader.report_warning(
-                'found multiple videos; please '
+                'found multiple medias; please '
                'report this with the video URL to http://yt-dl.org/bug')
-        if not videos:
-            raise ExtractorError('No video entries found')
-        return videos[0]
+        if not medias:
+            raise ExtractorError('No media entries found')
+        return medias[0]

    def _extract_formats(self, assets):
+
+        def text_or_none(asset, tag):
+            elem = asset.find(tag)
+            return None if elem is None else elem.text
+
        formats = [{
-            "url": asset.find("downloadUrl").text,
-            "ext": asset.find("mediaType").text,
-            "format_id": asset.get("type"),
-            "width": int(asset.find("frameWidth").text),
-            "height": int(asset.find("frameHeight").text),
-            "tbr": int(asset.find("bitrateVideo").text),
-            "abr": int(asset.find("bitrateAudio").text),
-            "vcodec": asset.find("codecVideo").text,
-            "container": asset.find("mediaType").text,
-            "filesize": int(asset.find("size").text),
-        } for asset in assets.findall("asset")
-            if asset.find("downloadUrl") is not None]
+            'url': text_or_none(asset, 'downloadUrl'),
+            'ext': text_or_none(asset, 'mediaType'),
+            'format_id': asset.get('type'),
+            'width': int_or_none(text_or_none(asset, 'frameWidth')),
+            'height': int_or_none(text_or_none(asset, 'frameHeight')),
+            'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')),
+            'abr': int_or_none(text_or_none(asset, 'bitrateAudio')),
+            'vcodec': text_or_none(asset, 'codecVideo'),
+            'acodec': text_or_none(asset, 'codecAudio'),
+            'container': text_or_none(asset, 'mediaType'),
+            'filesize': int_or_none(text_or_none(asset, 'size')),
+        } for asset in assets.findall('asset')
+            if asset.find('downloadUrl') is not None]

        self._sort_formats(formats)
        return formats

    def _extract_thumbnails(self, variants):
        thumbnails = [{
-            "url": self._BASE_URL + variant.find("url").text,
-            "width": int(variant.find("width").text),
-            "height": int(variant.find("height").text),
-        } for variant in variants.findall("variant")]
-        thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
+            'url': self._BASE_URL + variant.find('url').text,
+            'width': int_or_none(variant.find('width').text),
+            'height': int_or_none(variant.find('height').text),
+        } for variant in variants.findall('variant')]
+        thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
        return thumbnails
@@ -140,7 +140,11 @@ class BrightcoveIE(InfoExtractor):

        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
        if url_m:
-            return [unescapeHTML(url_m.group(1))]
+            url = unescapeHTML(url_m.group(1))
+            # Some sites don't add it, we can't download with this url, for example:
+            # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
+            if 'playerKey' in url:
+                return [url]

        matches = re.findall(
            r'''(?sx)<object
@@ -4,9 +4,7 @@ import json
 import re

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
+from ..utils import ExtractorError


 class BYUtvIE(InfoExtractor):
@@ -16,7 +14,7 @@ class BYUtvIE(InfoExtractor):
        'info_dict': {
            'id': 'granite-flats-talking',
            'ext': 'mp4',
-            'description': 'md5:1a7ae3e153359b7cc355ef3963441e5f',
+            'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c',
            'title': 'Talking',
            'thumbnail': 're:^https?://.*promo.*'
        },
@@ -1,4 +1,6 @@
 # encoding: utf-8
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -8,46 +10,56 @@ from ..utils import unified_strdate
 class CanalplusIE(InfoExtractor):
    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
    _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
-    IE_NAME = u'canalplus.fr'
+    IE_NAME = 'canalplus.fr'

    _TEST = {
-        u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
-        u'file': u'922470.flv',
-        u'info_dict': {
-            u'title': u'Zapping - 26/08/13',
-            u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
-            u'upload_date': u'20130826',
-        },
-        u'params': {
-            u'skip_download': True,
+        'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
+        'md5': '60c29434a416a83c15dae2587d47027d',
+        'info_dict': {
+            'id': '922470',
+            'ext': 'flv',
+            'title': 'Zapping - 26/08/13',
+            'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
+            'upload_date': '20130826',
        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.groupdict().get('id')
+        video_id = mobj.group('id')
+
        if video_id is None:
            webpage = self._download_webpage(url, mobj.group('path'))
-            video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, u'video id')
+            video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, 'video id')
+
        info_url = self._VIDEO_INFO_TEMPLATE % video_id
-        doc = self._download_xml(info_url,video_id, 
-                                           u'Downloading video info')
+        doc = self._download_xml(info_url, video_id, 'Downloading video XML')

-        self.report_extraction(video_id)
        video_info = [video for video in doc if video.find('ID').text == video_id][0]
-        infos = video_info.find('INFOS')
        media = video_info.find('MEDIA')
-        formats = [media.find('VIDEOS/%s' % format)
-            for format in ['BAS_DEBIT', 'HAUT_DEBIT', 'HD']]
-        video_url = [format.text for format in formats if format is not None][-1]
+        infos = video_info.find('INFOS')

-        return {'id': video_id,
-                'title': u'%s - %s' % (infos.find('TITRAGE/TITRE').text,
-                                       infos.find('TITRAGE/SOUS_TITRE').text),
-                'url': video_url,
-                'ext': 'flv',
-                'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
-                'thumbnail': media.find('IMAGES/GRAND').text,
-                'description': infos.find('DESCRIPTION').text,
-                'view_count': int(infos.find('NB_VUES').text),
-                }
+        preferences = ['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS']
+
+        formats = [
+            {
+                'url': fmt.text + '?hdcore=2.11.3' if fmt.tag == 'HDS' else fmt.text,
+                'format_id': fmt.tag,
+                'ext': 'mp4' if fmt.tag == 'HLS' else 'flv',
+                'preference': preferences.index(fmt.tag) if fmt.tag in preferences else -1,
+            } for fmt in media.find('VIDEOS') if fmt.text
+        ]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': '%s - %s' % (infos.find('TITRAGE/TITRE').text,
+                                  infos.find('TITRAGE/SOUS_TITRE').text),
+            'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
+            'thumbnail': media.find('IMAGES/GRAND').text,
+            'description': infos.find('DESCRIPTION').text,
+            'view_count': int(infos.find('NB_VUES').text),
+            'like_count': int(infos.find('NB_LIKES').text),
+            'comment_count': int(infos.find('NB_COMMENTS').text),
+            'formats': formats,
+        }
@@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    qualities,
+)
+
+
+class ClubicIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?clubic\.com/video/[^/]+/video.*-(?P<id>[0-9]+)\.html'
+
+    _TEST = {
+        'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
+        'md5': '1592b694ba586036efac1776b0b43cd3',
+        'info_dict': {
+            'id': '448474',
+            'ext': 'mp4',
+            'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité',
+            'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
+            'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
+        player_page = self._download_webpage(player_url, video_id)
+
+        config_json = self._search_regex(
+            r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
+            'configuration')
+        config = json.loads(config_json)
+
+        video_info = config['videoInfo']
+        sources = config['sources']
+        quality_order = qualities(['sd', 'hq'])
+
+        formats = [{
+            'format_id': src['streamQuality'],
+            'url': src['src'],
+            'quality': quality_order(src['streamQuality']),
+        } for src in sources]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_info['title'],
+            'formats': formats,
+            'description': clean_html(video_info.get('description')),
+            'thumbnail': config.get('poster'),
+        }
@@ -33,7 +33,7 @@ class CNETIE(InfoExtractor):

        webpage = self._download_webpage(url, display_id)
        data_json = self._html_search_regex(
-            r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
+            r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
            webpage, 'data json')
        data = json.loads(data_json)
        vdata = data['video']
@@ -21,7 +21,7 @@ class ComedyCentralIE(MTVServicesInfoExtractor):

    _TEST = {
        'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
-        'md5': '4167875aae411f903b751a21f357f1ee',
+        'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
        'info_dict': {
            'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
            'ext': 'mp4',
@@ -41,9 +41,9 @@ class ComedyCentralShowsIE(InfoExtractor):
    _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
                      |https?://(:www\.)?
                          (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
-                         (full-episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
+                         ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
                          (?P<clip>
-                              (?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+))
+                              (?:(?:guests/[^/]+|videos|video-playlists|special-editions)/[^/]+/(?P<videotitle>[^/?#]+))
                              |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
                          )|
@@ -251,7 +251,10 @@ class InfoExtractor(object):
            with open(filename, 'wb') as outf:
                outf.write(webpage_bytes)

-        content = webpage_bytes.decode(encoding, 'replace')
+        try:
+            content = webpage_bytes.decode(encoding, 'replace')
+        except LookupError:
+            content = webpage_bytes.decode('utf-8', 'replace')

        if (u'<title>Access to this site is blocked</title>' in content and
                u'Websense' in content[:512]):
@@ -276,9 +279,12 @@ class InfoExtractor(object):

    def _download_xml(self, url_or_request, video_id,
                      note=u'Downloading XML', errnote=u'Unable to download XML',
-                      transform_source=None):
+                      transform_source=None, fatal=True):
        """Return the xml as an xml.etree.ElementTree.Element"""
-        xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
+        xml_string = self._download_webpage(
+            url_or_request, video_id, note, errnote, fatal=fatal)
+        if xml_string is False:
+            return xml_string
        if transform_source:
            xml_string = transform_source(xml_string)
        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
@@ -28,16 +28,18 @@ class CondeNastIE(InfoExtractor):
        'glamour': 'Glamour',
        'wmagazine': 'W Magazine',
        'vanityfair': 'Vanity Fair',
+        'cnevids': 'Condé Nast',
    }

-    _VALID_URL = r'http://(video|www)\.(?P<site>%s)\.com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
+    _VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
    IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))

    _TEST = {
        'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
-        'file': '5171b343c2b4c00dd0c1ccb3.mp4',
        'md5': '1921f713ed48aabd715691f774c451f7',
        'info_dict': {
+            'id': '5171b343c2b4c00dd0c1ccb3',
+            'ext': 'mp4',
            'title': '3D Printed Speakers Lit With LED',
            'description': 'Check out these beautiful 3D printed LED speakers.  You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
        }
@@ -55,12 +57,16 @@ class CondeNastIE(InfoExtractor):
        entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
        return self.playlist_result(entries, playlist_title=title)

-    def _extract_video(self, webpage):
-        description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
-                                               r'<div class="video-post-content">(.+?)</div>',
-                                               ],
-                                              webpage, 'description',
-                                              fatal=False, flags=re.DOTALL)
+    def _extract_video(self, webpage, url_type):
+        if url_type != 'embed':
+            description = self._html_search_regex(
+                [
+                    r'<div class="cne-video-description">(.+?)</div>',
+                    r'<div class="video-post-content">(.+?)</div>',
+                ],
+                webpage, 'description', fatal=False, flags=re.DOTALL)
+        else:
+            description = None
        params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
                                    'player params', flags=re.DOTALL)
        video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
@@ -99,12 +105,12 @@ class CondeNastIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        site = mobj.group('site')
        url_type = mobj.group('type')
-        id = mobj.group('id')
+        item_id = mobj.group('id')

-        self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site])
-        webpage = self._download_webpage(url, id)
+        self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
+        webpage = self._download_webpage(url, item_id)

        if url_type == 'series':
            return self._extract_series(url, webpage)
        else:
-            return self._extract_video(webpage)
+            return self._extract_video(webpage, url_type)
@@ -8,12 +8,11 @@ from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
    compat_urllib_request,
    compat_str,
-    get_element_by_id,
    orderedSet,
    str_to_int,
    int_or_none,
-
    ExtractorError,
+    unescapeHTML,
 )

 class DailymotionBaseInfoExtractor(InfoExtractor):
@@ -189,7 +188,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
            webpage = self._download_webpage(request,
                                             id, u'Downloading page %s' % pagenum)

-            video_ids.extend(re.findall(r'data-id="(.+?)"', webpage))
+            video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))

            if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
                break
@@ -201,11 +200,12 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
        playlist_id = mobj.group('id')
        webpage = self._download_webpage(url, playlist_id)

-        return {'_type': 'playlist',
-                'id': playlist_id,
-                'title': get_element_by_id(u'playlist_name', webpage),
-                'entries': self._extract_entries(playlist_id),
-                }
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': self._og_search_title(webpage),
+            'entries': self._extract_entries(playlist_id),
+        }


 class DailymotionUserIE(DailymotionPlaylistIE):
@@ -217,9 +217,9 @@ class DailymotionUserIE(DailymotionPlaylistIE):
        mobj = re.match(self._VALID_URL, url)
        user = mobj.group('user')
        webpage = self._download_webpage(url, user)
-        full_user = self._html_search_regex(
-            r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user),
-            webpage, u'user', flags=re.DOTALL)
+        full_user = unescapeHTML(self._html_search_regex(
+            r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
+            webpage, u'user', flags=re.DOTALL))

        return {
            '_type': 'playlist',
@@ -1,4 +1,5 @@
-import os
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -8,18 +9,23 @@ from ..utils import (
    compat_urllib_parse,
 )

+
 class ExtremeTubeIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
-    _TEST = {
-        u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
-        u'file': u'652431.mp4',
-        u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0',
-        u'info_dict': {
-            u"title": u"Music Video 14 british euro brit european cumshots swallow",
-            u"uploader": u"unknown",
-            u"age_limit": 18,
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
+    _TESTS = [{
+        'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
+        'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
+        'info_dict': {
+            'id': '652431',
+            'ext': 'mp4',
+            'title': 'Music Video 14 british euro brit european cumshots swallow',
+            'uploader': 'unknown',
+            'age_limit': 18,
        }
-    }
+    }, {
+        'url': 'http://www.extremetube.com/gay/video/abcde-1234',
+        'only_matching': True,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -30,11 +36,14 @@ class ExtremeTubeIE(InfoExtractor):
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)

-        video_title = self._html_search_regex(r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, u'title')
-        uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False)
-        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, u'video_url'))
+        video_title = self._html_search_regex(
+            r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title')
+        uploader = self._html_search_regex(
+            r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
+            fatal=False)
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(
+            r'video_url=(.+?)&amp;', webpage, 'video_url'))
        path = compat_urllib_parse_urlparse(video_url).path
-        extension = os.path.splitext(path)[1][1:]
        format = path.split('/')[5].split('_')[:2]
        format = "-".join(format)

@@ -43,7 +52,6 @@ class ExtremeTubeIE(InfoExtractor):
            'title': video_title,
            'uploader': uploader,
            'url': video_url,
-            'ext': extension,
            'format': format,
            'format_id': format,
            'age_limit': 18,
@@ -76,9 +76,8 @@ class FacebookIE(InfoExtractor):

            check_form = {
                'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
-                'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
+                'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'),
                'name_action_selected': 'dont_save',
-                'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
            }
            check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
            check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
@@ -6,7 +6,6 @@ from .common import InfoExtractor


 class FirstpostIE(InfoExtractor):
-    IE_NAME = 'Firstpost.com'
    _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'

    _TEST = {
@@ -16,7 +15,6 @@ class FirstpostIE(InfoExtractor):
            'id': '1025403',
            'ext': 'mp4',
            'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
-            'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.',
        }
    }

@@ -24,15 +22,26 @@ class FirstpostIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

-        webpage = self._download_webpage(url, video_id)
-        video_url = self._html_search_regex(
-            r'<div.*?name="div_video".*?flashvars="([^"]+)">',
-            webpage, 'video URL')
+        data = self._download_xml(
+            'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
+            'Downloading video XML')
+
+        item = data.find('./playlist/item')
+        thumbnail = item.find('./image').text
+        title = item.find('./title').text
+
+        formats = [
+            {
+                'url': details.find('./file').text,
+                'format_id': details.find('./label').text.strip(),
+                'width': int(details.find('./width').text.strip()),
+                'height': int(details.find('./height').text.strip()),
+            } for details in item.findall('./source/file_details') if details.find('./file').text
+        ]

        return {
            'id': video_id,
-            'url': video_url,
-            'title': self._og_search_title(webpage),
-            'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
        }
@@ -5,6 +5,8 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    compat_str,
+    compat_urllib_parse,
+    ExtractorError,
 )


@@ -16,16 +18,28 @@ class FiveMinIE(InfoExtractor):
        (?P<id>\d+)
        '''

-    _TEST = {
-        # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
-        'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
-        'md5': '4f7b0b79bf1a470e5004f7112385941d',
-        'info_dict': {
-            'id': '518013791',
-            'ext': 'mp4',
-            'title': 'iPad Mini with Retina Display Review',
+    _TESTS = [
+        {
+            # From http://www.engadget.com/2013/11/15/ipad-mini-retina-display-review/
+            'url': 'http://pshared.5min.com/Scripts/PlayerSeed.js?sid=281&width=560&height=345&playList=518013791',
+            'md5': '4f7b0b79bf1a470e5004f7112385941d',
+            'info_dict': {
+                'id': '518013791',
+                'ext': 'mp4',
+                'title': 'iPad Mini with Retina Display Review',
+            },
        },
-    }
+        {
+            # From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
+            'url': '5min:518086247',
+            'md5': 'e539a9dd682c288ef5a498898009f69e',
+            'info_dict': {
+                'id': '518086247',
+                'ext': 'mp4',
+                'title': 'How to Make a Next-Level Fruit Salad',
+            },
+        },
+    ]

    @classmethod
    def _build_result(cls, video_id):
@@ -34,10 +48,28 @@ class FiveMinIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        info = self._download_json(
-            'https://syn.5min.com/handlers/SenseHandler.ashx?func=GetResults&'
-            'playlist=%s&url=https' % video_id,
-            video_id)['binding'][0]
+        embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
+        embed_page = self._download_webpage(embed_url, video_id,
+            'Downloading embed page')
+        sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
+        query = compat_urllib_parse.urlencode({
+            'func': 'GetResults',
+            'playlist': video_id,
+            'sid': sid,
+            'isPlayerSeed': 'true',
+            'url': embed_url,
+        })
+        response = self._download_json(
+            'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
+            video_id)
+        if not response['success']:
+            err_msg = response['errorMessage']
+            if err_msg == 'ErrorVideoUserNotGeo':
+                msg = 'Video not available from your location'
+            else:
+                msg = 'Aol said: %s' % err_msg
+            raise ExtractorError(msg, expected=True, video_id=video_id)
+        info = response['binding'][0]

        second_id = compat_str(int(video_id[:-2]) + 1)
        formats = []
@@ -35,9 +35,10 @@ class GenericIE(InfoExtractor):
    _TESTS = [
        {
            'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
-            'file': '13601338388002.mp4',
-            'md5': '6e15c93721d7ec9e9ca3fdbf07982cfd',
+            'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
            'info_dict': {
+                'id': '13601338388002',
+                'ext': 'mp4',
                'uploader': 'www.hodiho.fr',
                'title': 'R\u00e9gis plante sa Jeep',
            }
@@ -46,8 +47,9 @@ class GenericIE(InfoExtractor):
        {
            'add_ie': ['Bandcamp'],
            'url': 'http://bronyrock.com/track/the-pony-mash',
-            'file': '3235767654.mp3',
            'info_dict': {
+                'id': '3235767654',
+                'ext': 'mp3',
                'title': 'The Pony Mash',
                'uploader': 'M_Pallante',
            },
@@ -73,9 +75,10 @@ class GenericIE(InfoExtractor):
        {
            # https://github.com/rg3/youtube-dl/issues/2253
            'url': 'http://bcove.me/i6nfkrc3',
-            'file': '3101154703001.mp4',
            'md5': '0ba9446db037002366bab3b3eb30c88c',
            'info_dict': {
+                'id': '3101154703001',
+                'ext': 'mp4',
                'title': 'Still no power',
                'uploader': 'thestar.com',
                'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
@@ -236,6 +239,28 @@ class GenericIE(InfoExtractor):
                'uploader_id': 'rbctv_2012_4',
            },
        },
+        # Condé Nast embed
+        {
+            'url': 'http://www.wired.com/2014/04/honda-asimo/',
+            'md5': 'ba0dfe966fa007657bd1443ee672db0f',
+            'info_dict': {
+                'id': '53501be369702d3275860000',
+                'ext': 'mp4',
+                'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
+            }
+        },
+        # Dailymotion embed
+        {
+            'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
+            'md5': '441aeeb82eb72c422c7f14ec533999cd',
+            'info_dict': {
+                'id': 'k2mm4bCdJ6CQ2i7c8o2',
+                'ext': 'mp4',
+                'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
+                'uploader': 'Spi0n',
+            },
+            'add_ie': ['Dailymotion'],
+        }
    ]

    def report_download_webpage(self, video_id):
@@ -320,6 +345,15 @@ class GenericIE(InfoExtractor):
        }

    def _real_extract(self, url):
+        if url.startswith('//'):
+            return {
+                '_type': 'url',
+                'url': (
+                    'http:'
+                    if self._downloader.params.get('prefer_insecure', False)
+                    else 'https:') + url,
+            }
+
        parsed_url = compat_urlparse.urlparse(url)
        if not parsed_url.scheme:
            default_search = self._downloader.params.get('default_search')
@@ -456,7 +490,7 @@ class GenericIE(InfoExtractor):
        matches = re.findall(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
        if matches:
-            urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
+            urlrs = [self.url_result(unescapeHTML(tuppl[1]))
                     for tuppl in matches]
            return self.playlist_result(
                urlrs, playlist_id=video_id, playlist_title=video_title)
@@ -482,6 +516,22 @@ class GenericIE(InfoExtractor):
        if mobj:
            return self.url_result(mobj.group(1), 'BlipTV')

+        # Look for embedded condenast player
+        matches = re.findall(
+            r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
+            webpage)
+        if matches:
+            return {
+                '_type': 'playlist',
+                'entries': [{
+                    '_type': 'url',
+                    'ie_key': 'CondeNast',
+                    'url': ma,
+                } for ma in matches],
+                'title': video_title,
+                'id': video_id,
+            }
+
        # Look for Bandcamp pages with custom domain
        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
        if mobj is not None:
@@ -502,7 +552,7 @@ class GenericIE(InfoExtractor):
            return OoyalaIE._build_url_result(mobj.group('ec'))

        # Look for Aparat videos
-        mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
+        mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Aparat')

@@ -590,7 +640,13 @@ class GenericIE(InfoExtractor):
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if mobj is None:
            # Look for gorilla-vid style embedding
-            mobj = re.search(r'(?s)(?:jw_plugins|JWPlayerOptions).*?file\s*:\s*["\'](.*?)["\']', webpage)
+            mobj = re.search(r'''(?sx)
+                (?:
+                    jw_plugins|
+                    JWPlayerOptions|
+                    jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
+                )
+                .*?file\s*:\s*["\'](.*?)["\']''', webpage)
        if mobj is None:
            # Broaden the search a little bit
            mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
@@ -106,7 +106,7 @@ class OneUPIE(IGNIE):

    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://gamevideos.1up.com/video/id/34976',
        'md5': '68a54ce4ebc772e4b71e3123d413163d',
        'info_dict': {
@@ -115,10 +115,7 @@ class OneUPIE(IGNIE):
            'title': 'Sniper Elite V2 - Trailer',
            'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
        }
-    }
-
-    # Override IGN tests
-    _TESTS = []
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -11,16 +11,15 @@ from ..utils import (

 class InfoQIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
+
    _TEST = {
-        "name": "InfoQ",
-        "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
-        "file": "12-jan-pythonthings.mp4",
-        "info_dict": {
-            "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",
-            "title": "A Few of My Favorite [Python] Things",
-        },
-        "params": {
-            "skip_download": True,
+        'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
+        'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
+        'info_dict': {
+            'id': '12-jan-pythonthings',
+            'ext': 'mp4',
+            'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
+            'title': 'A Few of My Favorite [Python] Things',
        },
    }

@@ -30,26 +29,39 @@ class InfoQIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

+        video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
+        video_description = self._html_search_meta('description', webpage, 'description')
+
+        # The server URL is hardcoded
+        video_url = 'rtmpe://video.infoq.com/cfx/st/'
+
        # Extract video URL
-        encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
+        encoded_id = self._search_regex(
+            r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id')
        real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
-        video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
+        playpath = 'mp4:' + real_id

-        # Extract title
-        video_title = self._search_regex(r'contentTitle = "(.*?)";',
-            webpage, 'title')
-
-        # Extract description
-        video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
-            webpage, 'description', fatal=False)
-
-        video_filename = video_url.split('/')[-1]
+        video_filename = playpath.split('/')[-1]
        video_id, extension = video_filename.split('.')

+        http_base = self._search_regex(
+            r'EXPRESSINSTALL_SWF\s*=\s*"(https?://[^/"]+/)', webpage,
+            'HTTP base URL')
+
+        formats = [{
+            'format_id': 'rtmp',
+            'url': video_url,
+            'ext': extension,
+            'play_path': playpath,
+        }, {
+            'format_id': 'http',
+            'url': http_base + real_id,
+        }]
+        self._sort_formats(formats)
+
        return {
            'id': video_id,
-            'url': video_url,
            'title': video_title,
-            'ext': extension,  # Extension is always(?) mp4, but seems to be flv
            'description': video_description,
+            'formats': formats,
        }
@@ -14,7 +14,7 @@ class JukeboxIE(InfoExtractor):
    _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
    _TEST = {
        'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
-        'md5': '5dc6477e74b1e37042ac5acedd8413e5',
+        'md5': '1574e9b4d6438446d5b7dbcdf2786276',
        'info_dict': {
            'id': 'r303r',
            'ext': 'flv',
@@ -1,15 +1,18 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)


 class MDRIE(InfoExtractor):
-    _VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
+    _VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
    
    # No tests, MDR regularily deletes its videos
+    _TEST = {
+        'url': 'http://www.mdr.de/fakt/video189002.html',
+        'only_matching': True,
+    }

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
@@ -19,9 +22,9 @@ class MDRIE(InfoExtractor):
        # determine title and media streams from webpage
        html = self._download_webpage(url, video_id)

-        title = self._html_search_regex(r'<h2>(.*?)</h2>', html, u'title')
+        title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title')
        xmlurl = self._search_regex(
-            r'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, u'XML URL')
+            r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL')

        doc = self._download_xml(domain + xmlurl, video_id)
        formats = []
@@ -41,7 +44,7 @@ class MDRIE(InfoExtractor):
            if vbr_el is None:
                format.update({
                    'vcodec': 'none',
-                    'format_id': u'%s-%d' % (media_type, abr),
+                    'format_id': '%s-%d' % (media_type, abr),
                })
            else:
                vbr = int(vbr_el.text) // 1000
@@ -49,12 +52,9 @@ class MDRIE(InfoExtractor):
                    'vbr': vbr,
                    'width': int(a.find('frameWidth').text),
                    'height': int(a.find('frameHeight').text),
-                    'format_id': u'%s-%d' % (media_type, vbr),
+                    'format_id': '%s-%d' % (media_type, vbr),
                })
            formats.append(format)
-        if not formats:
-            raise ExtractorError(u'Could not find any valid formats')
-
        self._sort_formats(formats)

        return {
@@ -8,7 +8,7 @@ from .common import InfoExtractor

 class MorningstarIE(InfoExtractor):
    IE_DESC = 'morningstar.com'
-    _VALID_URL = r'https?://(?:www\.)?morningstar\.com/cover/videocenter\.aspx\?id=(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
        'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class MoviezineIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.moviezine\.se/video/(?P<id>[^?#]+)'
+
+    _TEST = {
+        'url': 'http://www.moviezine.se/video/205866',
+        'info_dict': {
+            'id': '205866',
+            'ext': 'mp4',
+            'title': 'Oculus - Trailer 1',
+            'description': 'md5:40cc6790fc81d931850ca9249b40e8a4',
+            'thumbnail': 're:http://.*\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
+
+        formats =[{
+            'format_id': 'sd',
+            'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'),
+            'quality': 0,
+            'ext': 'mp4',
+        }]
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'),
+            'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'),
+            'formats': formats,
+            'description': self._og_search_description(webpage),
+        }
@@ -4,9 +4,7 @@ import json
 import re

 from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-)
+from ..utils import int_or_none


 class MporaIE(InfoExtractor):
@@ -20,7 +18,7 @@ class MporaIE(InfoExtractor):
        'info_dict': {
            'title': 'Katy Curd -  Winter in the Forest',
            'duration': 416,
-            'uploader': 'petenewman',
+            'uploader': 'Peter Newman Media',
        },
    }

@@ -1,15 +1,22 @@
 from __future__ import unicode_literals

 import re
+import json

 from .common import InfoExtractor
+from ..utils import str_to_int


 class NineGagIE(InfoExtractor):
    IE_NAME = '9gag'
-    _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
+    _VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
+        (?:
+            v/(?P<numid>[0-9]+)|
+            p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
+        )
+    '''

-    _TEST = {
+    _TESTS = [{
        "url": "http://9gag.tv/v/1912",
        "info_dict": {
            "id": "1912",
@@ -20,34 +27,42 @@ class NineGagIE(InfoExtractor):
            "thumbnail": "re:^https?://",
        },
        'add_ie': ['Youtube']
-    }
+    },
+    {
+        'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
+        'info_dict': {
+            'id': 'KklwM',
+            'ext': 'mp4',
+            'display_id': 'alternate-banned-opening-scene-of-gravity',
+            "description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
+            'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
+        },
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = mobj.group('numid') or mobj.group('id')
+        display_id = mobj.group('display_id') or video_id

-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, display_id)

-        youtube_id = self._html_search_regex(
-            r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
-            webpage, 'video ID')
-        description = self._html_search_regex(
-            r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
-            'description', fatal=False)
-        view_count_str = self._html_search_regex(
-            r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count',
-            fatal=False)
-        view_count = (
-            None if view_count_str is None
-            else int(view_count_str.replace(',', '')))
+        post_view = json.loads(self._html_search_regex(
+            r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view'))
+
+        youtube_id = post_view['videoExternalId']
+        title = post_view['title']
+        description = post_view['description']
+        view_count = str_to_int(post_view['externalView'])
+        thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')

        return {
            '_type': 'url_transparent',
            'url': youtube_id,
            'ie_key': 'Youtube',
            'id': video_id,
-            'title': self._og_search_title(webpage),
+            'display_id': display_id,
+            'title': title,
            'description': description,
            'view_count': view_count,
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'thumbnail': thumbnail,
        }
@@ -0,0 +1,105 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    unified_strdate,
+    compat_str,
+)
+
+
+class NocoIE(InfoExtractor):
+    _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
+        'md5': '0a993f0058ddbcd902630b2047ef710e',
+        'info_dict': {
+            'id': '11538',
+            'ext': 'mp4',
+            'title': 'Ami Ami Idol - Hello! France',
+            'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
+            'upload_date': '20140412',
+            'uploader': 'Nolife',
+            'uploader_id': 'NOL',
+            'duration': 2851.2,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        medias = self._download_json(
+            'http://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
+
+        formats = []
+
+        for fmt in medias['fr']['video_list']['default']['quality_list']:
+            format_id = fmt['quality_key']
+
+            file = self._download_json(
+                'http://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
+                video_id, 'Downloading %s video JSON' % format_id)
+
+            file_url = file['file']
+            if not file_url:
+                continue
+
+            if file_url == 'forbidden':
+                raise ExtractorError(
+                    '%s returned error: %s - %s' % (
+                        self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']),
+                    expected=True)
+
+            formats.append({
+                'url': file_url,
+                'format_id': format_id,
+                'width': fmt['res_width'],
+                'height': fmt['res_lines'],
+                'abr': fmt['audiobitrate'],
+                'vbr': fmt['videobitrate'],
+                'filesize': fmt['filesize'],
+                'format_note': fmt['quality_name'],
+                'preference': fmt['priority'],
+            })
+
+        self._sort_formats(formats)
+
+        show = self._download_json(
+            'http://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
+
+        upload_date = unified_strdate(show['indexed'])
+        uploader = show['partner_name']
+        uploader_id = show['partner_key']
+        duration = show['duration_ms'] / 1000.0
+        thumbnail = show['screenshot']
+
+        episode = show.get('show_TT') or show.get('show_OT')
+        family = show.get('family_TT') or show.get('family_OT')
+        episode_number = show.get('episode_number')
+
+        title = ''
+        if family:
+            title += family
+        if episode_number:
+            title += ' #' + compat_str(episode_number)
+        if episode:
+            title += ' - ' + episode
+
+        description = show.get('show_resume') or show.get('family_resume')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'upload_date': upload_date,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'duration': duration,
+            'formats': formats,
+        }
@@ -0,0 +1,67 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class NRKIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
+
+    _TESTS = [
+        {
+            'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
+            'md5': '12618eef328c9a35c1b47d5583d9c30d',
+            'info_dict': {
+                'id': '150533',
+                'ext': 'flv',
+                'title': 'Dompap og andre fugler i Piip-Show',
+                'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
+            }
+        },
+        {
+            'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
+            'md5': '390b2ce15c0d6aa376ef5059ac9f865e',
+            'info_dict': {
+                'id': '154915',
+                'ext': 'flv',
+                'title': 'Slik høres internett ut når du er blind',
+                'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage(url, video_id)
+
+        video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
+
+        data = self._download_json(
+            'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
+
+        if data['usageRights']['isGeoBlocked']:
+            raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
+
+        video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
+
+        images = data.get('images')
+        if images:
+            thumbnails = images['webImages']
+            thumbnails.sort(key=lambda image: image['pixelWidth'])
+            thumbnail = thumbnails[-1]['imageUrl']
+        else:
+            thumbnail = None
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'flv',
+            'title': data['title'],
+            'description': data['description'],
+            'thumbnail': thumbnail,
+        }
@@ -59,11 +59,11 @@ class NTVIE(InfoExtractor):
        {
            'url': 'http://www.ntv.ru/kino/Koma_film',
            'info_dict': {
-                'id': '750783',
+                'id': '758100',
                'ext': 'flv',
-                'title': 'Остросюжетный фильм «Кома»  4 апреля вечером на НТВ',
-                'description': 'Остросюжетный фильм «Кома»  4 апреля вечером на НТВ',
-                'duration': 28,
+                'title': 'Остросюжетный фильм «Кома»',
+                'description': 'Остросюжетный фильм «Кома»',
+                'duration': 5592,
            },
            'params': {
                    # rtmp download
@@ -6,22 +6,36 @@ import re
 from .common import InfoExtractor
 from ..utils import int_or_none

-
 class PodomaticIE(InfoExtractor):
    IE_NAME = 'podomatic'
    _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'

-    _TEST = {
-        "url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
-        "file": "2009-01-02T16_03_35-08_00.mp3",
-        "md5": "84bb855fcf3429e6bf72460e1eed782d",
-        "info_dict": {
-            "uploader": "Science Teaching Tips",
-            "uploader_id": "scienceteachingtips",
-            "title": "64.  When the Moon Hits Your Eye",
-            "duration": 446,
-        }
-    }
+    _TESTS = [
+        {
+            'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
+            'md5': '84bb855fcf3429e6bf72460e1eed782d',
+            'info_dict': {
+                'id': '2009-01-02T16_03_35-08_00',
+                'ext': 'mp3',
+                'uploader': 'Science Teaching Tips',
+                'uploader_id': 'scienceteachingtips',
+                'title': '64.  When the Moon Hits Your Eye',
+                'duration': 446,
+            }
+        },
+        {
+            'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
+            'md5': 'd2cf443931b6148e27638650e2638297',
+            'info_dict': {
+                'id': '2013-11-15T16_31_21-08_00',
+                'ext': 'mp3',
+                'uploader': 'Ostbahnhof / Techno Mix',
+                'uploader_id': 'ostbahnhof',
+                'title': 'Einunddreizig',
+                'duration': 3799,
+            }
+        },
+    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -32,10 +46,12 @@ class PodomaticIE(InfoExtractor):
                     '?permalink=true&rtmp=0') %
                    (mobj.group('proto'), channel, video_id))
        data_json = self._download_webpage(
-            json_url, video_id, note=u'Downloading video info')
+            json_url, video_id, 'Downloading video info')
        data = json.loads(data_json)

        video_url = data['downloadLink']
+        if not video_url:
+            video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation'])
        uploader = data['podcast']
        title = data['title']
        thumbnail = data['imageLocation']
@@ -160,6 +160,7 @@ class ProSiebenSat1IE(InfoExtractor):
    _CLIPID_REGEXES = [
        r'"clip_id"\s*:\s+"(\d+)"',
        r'clipid: "(\d+)"',
+        r'clipId=(\d+)',
    ]
    _TITLE_REGEXES = [
        r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
@@ -46,7 +46,8 @@ class PyvideoIE(InfoExtractor):
            return self.url_result(m_youtube.group(1), 'Youtube')

        title = self._html_search_regex(
-            r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
+            r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>',
+            webpage, 'title', flags=re.DOTALL)
        video_url = self._search_regex(
            [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
            webpage, 'video url', flags=re.DOTALL)
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class RTBFIE(InfoExtractor):
+    _VALID_URL = r'https?://www.rtbf.be/video/[^\?]+\?id=(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
+        'md5': '799f334ddf2c0a582ba80c44655be570',
+        'info_dict': {
+            'id': '1921274',
+            'ext': 'mp4',
+            'title': 'Les Diables au coeur (épisode 2)',
+            'description': 'Football - Diables Rouges',
+            'duration': 3099,
+            'timestamp': 1398456336,
+            'upload_date': '20140425',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
+
+        data = json.loads(self._html_search_regex(
+            r'<div class="js-player-embed" data-video="([^"]+)"', page, 'data video'))['data']
+
+        video_url = data.get('downloadUrl') or data.get('url')
+
+        if data['provider'].lower() == 'youtube':
+            return self.url_result(video_url, 'Youtube')
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': data['title'],
+            'description': data.get('description') or data.get('subtitle'),
+            'thumbnail': data['thumbnail']['large'],
+            'duration': data.get('duration') or data.get('realDuration'),
+            'timestamp': data['created'],
+            'view_count': data['viewCount'],
+        }
@@ -43,13 +43,14 @@ class RutubeIE(InfoExtractor):
            'http://rutube.ru/api/video/%s/?format=json' % video_id,
            video_id, 'Downloading video JSON')

-        trackinfo = self._download_json(
-            'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
-            video_id, 'Downloading trackinfo JSON')
-
        # Some videos don't have the author field
-        author = trackinfo.get('author') or {}
-        m3u8_url = trackinfo['video_balancer'].get('m3u8')
+        author = video.get('author') or {}
+
+        options = self._download_json(
+            'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
+            video_id, 'Downloading options JSON')
+
+        m3u8_url = options['video_balancer'].get('m3u8')
        if m3u8_url is None:
            raise ExtractorError('Couldn\'t find m3u8 manifest url')

@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class SciVeeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?scivee\.tv/node/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://www.scivee.tv/node/62352',
+        #'md5': 'b16699b74c9e6a120f6772a44960304f',
+        'info_dict': {
+            'id': '62352',
+            'ext': 'mp4',
+            'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting',
+            'description': 'md5:81f1710638e11a481358fab1b11059d7',
+        },
+        'params': {
+            # Range HTTP header is ignored
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        # annotations XML is malformed
+        annotations = self._download_webpage(
+            'http://www.scivee.tv/assets/annotations/%s' % video_id, video_id, 'Downloading annotations')
+
+        title = self._html_search_regex(r'<title>([^<]+)</title>', annotations, 'title')
+        description = self._html_search_regex(r'<abstract>([^<]+)</abstract>', annotations, 'abstract', fatal=False)
+        filesize = int_or_none(self._html_search_regex(
+            r'<filesize>([^<]+)</filesize>', annotations, 'filesize', fatal=False))
+
+        formats = [
+            {
+                'url': 'http://www.scivee.tv/assets/audio/%s' % video_id,
+                'ext': 'mp3',
+                'format_id': 'audio',
+            },
+            {
+                'url': 'http://www.scivee.tv/assets/video/%s' % video_id,
+                'ext': 'mp4',
+                'format_id': 'video',
+                'filesize': filesize,
+            },
+        ]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id,
+            'formats': formats,
+        }
@@ -39,7 +39,8 @@ class SlideshareIE(InfoExtractor):
        ext = info['jsplayer']['video_extension']
        video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
        description = self._html_search_regex(
-            r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description')
+            r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
+            'description', fatal=False)

        return {
            '_type': 'video',
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -8,78 +10,114 @@ from ..utils import (


 class SteamIE(InfoExtractor):
-    _VALID_URL = r"""http://store\.steampowered\.com/
-                (agecheck/)?
-                (?P<urltype>video|app)/ #If the page is only for videos or for a game
-                (?P<gameID>\d+)/?
-                (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
-                """
+    _VALID_URL = r"""(?x)
+        https?://store\.steampowered\.com/
+            (agecheck/)?
+            (?P<urltype>video|app)/ #If the page is only for videos or for a game
+            (?P<gameID>\d+)/?
+            (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
+        |
+        https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
+    """
    _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
    _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
-    _TEST = {
-        u"url": u"http://store.steampowered.com/video/105600/",
-        u"playlist": [
+    _TESTS = [{
+        "url": "http://store.steampowered.com/video/105600/",
+        "playlist": [
            {
-                u"file": u"81300.flv",
-                u"md5": u"f870007cee7065d7c76b88f0a45ecc07",
-                u"info_dict": {
-                        u"title": u"Terraria 1.1 Trailer",
-                        u'playlist_index': 1,
+                "md5": "f870007cee7065d7c76b88f0a45ecc07",
+                "info_dict": {
+                    'id': '81300',
+                    'ext': 'flv',
+                    "title": "Terraria 1.1 Trailer",
+                    'playlist_index': 1,
                }
            },
            {
-                u"file": u"80859.flv",
-                u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751",
-                u"info_dict": {
-                    u"title": u"Terraria Trailer",
-                    u'playlist_index': 2,
+                "md5": "61aaf31a5c5c3041afb58fb83cbb5751",
+                "info_dict": {
+                    'id': '80859',
+                    'ext': 'flv',
+                    "title": "Terraria Trailer",
+                    'playlist_index': 2,
                }
            }
-        ]
-    }
-
-
-    @classmethod
-    def suitable(cls, url):
-        """Receives a URL and returns True if suitable for this IE."""
-        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
+        ],
+        'params': {
+            'playlistend': 2,
+        }
+    }, {
+        'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
+        'info_dict': {
+            'id': 'WB5DvDOOvAY',
+            'ext': 'mp4',
+            'upload_date': '20140329',
+            'title': 'FRONTIERS - Final Greenlight Trailer',
+            'description': "The final trailer for the Steam Greenlight launch. Hooray, progress! Here's the official Greenlight page: http://steamcommunity.com/sharedfiles/filedetails/?id=242472205",
+            'uploader': 'AAD Productions',
+            'uploader_id': 'AtomicAgeDogGames',
+        }
+    }]

    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url, re.VERBOSE)
-        gameID = m.group('gameID')
-
-        videourl = self._VIDEO_PAGE_TEMPLATE % gameID
-        webpage = self._download_webpage(videourl, gameID)
+        m = re.match(self._VALID_URL, url)
+        fileID = m.group('fileID')
+        if fileID:
+            videourl = url
+            playlist_id = fileID
+        else:
+            gameID = m.group('gameID')
+            playlist_id = gameID
+            videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
+        webpage = self._download_webpage(videourl, playlist_id)

        if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
-            videourl = self._AGECHECK_TEMPLATE % gameID
+            videourl = self._AGECHECK_TEMPLATE % playlist_id
            self.report_age_confirmation()
-            webpage = self._download_webpage(videourl, gameID)
+            webpage = self._download_webpage(videourl, playlist_id)

-        self.report_extraction(gameID)
-        game_title = self._html_search_regex(r'<h2 class="pageheader">(.*?)</h2>',
-                                             webpage, 'game title')
+        if fileID:
+            playlist_title = self._html_search_regex(
+                r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title')
+            mweb = re.finditer(r'''(?x)
+                'movie_(?P<videoID>[0-9]+)':\s*\{\s*
+                YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)",
+                ''', webpage)
+            videos = [{
+                '_type': 'url',
+                'url': vid.group('youtube_id'),
+                'ie_key': 'Youtube',
+            } for vid in mweb]
+        else:
+            playlist_title = self._html_search_regex(
+                r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title')

-        urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},"
-        mweb = re.finditer(urlRE, webpage)
-        namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
-        titles = re.finditer(namesRE, webpage)
-        thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
-        thumbs = re.finditer(thumbsRE, webpage)
-        videos = []
-        for vid,vtitle,thumb in zip(mweb,titles,thumbs):
-            video_id = vid.group('videoID')
-            title = vtitle.group('videoName')
-            video_url = vid.group('videoURL')
-            video_thumb = thumb.group('thumbnail')
-            if not video_url:
-                raise ExtractorError(u'Cannot find video url for %s' % video_id)
-            info = {
-                'id':video_id,
-                'url':video_url,
-                'ext': 'flv',
-                'title': unescapeHTML(title),
-                'thumbnail': video_thumb
-                  }
-            videos.append(info)
-        return [self.playlist_result(videos, gameID, game_title)]
+            mweb = re.finditer(r'''(?x)
+                'movie_(?P<videoID>[0-9]+)':\s*\{\s*
+                FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)"
+                (,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},
+                ''', webpage)
+            titles = re.finditer(
+                r'<span class="title">(?P<videoName>.+?)</span>', webpage)
+            thumbs = re.finditer(
+                r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage)
+            videos = []
+
+            for vid, vtitle, thumb in zip(mweb, titles, thumbs):
+                video_id = vid.group('videoID')
+                title = vtitle.group('videoName')
+                video_url = vid.group('videoURL')
+                video_thumb = thumb.group('thumbnail')
+                if not video_url:
+                    raise ExtractorError('Cannot find video url for %s' % video_id)
+                videos.append({
+                    'id': video_id,
+                    'url': video_url,
+                    'ext': 'flv',
+                    'title': unescapeHTML(title),
+                    'thumbnail': video_thumb
+                })
+        if not videos:
+            raise ExtractorError('Could not find any videos')
+
+        return self.playlist_result(videos, playlist_id, playlist_title)
@@ -6,9 +6,9 @@ from .common import InfoExtractor


 class SyfyIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.syfy\.com/videos/.+?vid:(?P<id>\d+)'
+    _VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P<id>[0-9]+)|(?!videos)(?P<video_name>[^/]+)(?:$|[?#]))'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458',
        'md5': 'e07de1d52c7278adbb9b9b1c93a66849',
        'info_dict': {
@@ -18,10 +18,30 @@ class SyfyIE(InfoExtractor):
            'description': 'Listen to what insights George Lucas give his daughter Amanda.',
        },
        'add_ie': ['ThePlatform'],
-    }
+    }, {
+        'url': 'http://www.syfy.com/wilwheaton',
+        'md5': '94dfa54ee3ccb63295b276da08c415f6',
+        'info_dict': {
+            'id': '4yoffOOXC767',
+            'ext': 'flv',
+            'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.',
+            'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.',
+        },
+        'add_ie': ['ThePlatform'],
+        'skip': 'Blocked outside the US',
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_name = mobj.group('video_name')
+        if video_name:
+            generic_webpage = self._download_webpage(url, video_name)
+            video_id = self._search_regex(
+                r'<iframe.*?class="video_iframe_page"\s+src="/_utils/video/thP_video_controller.php.*?_vid([0-9]+)">',
+                generic_webpage, 'video ID')
+            url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % (
+                video_name, video_name, video_id)
+        else:
+            video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        return self.url_result(self._og_search_video_url(webpage))
@@ -3,9 +3,6 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)


 class TeamcocoIE(InfoExtractor):
@@ -49,6 +49,16 @@ class TEDIE(SubtitlesInfoExtractor):
            'thumbnail': 're:^https?://.+\.jpg',
            'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
        }
+    }, {
+        'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
+        'md5': '49144e345a899b8cb34d315f3b9cfeeb',
+        'info_dict': {
+            'id': '1972',
+            'ext': 'mp4',
+            'title': 'Be passionate. Be courageous. Be your best.',
+            'uploader': 'Gabby Giffords and Mark Kelly',
+            'description': 'md5:5174aed4d0f16021b704120360f72b92',
+        },
    }]

    _NATIVE_FORMATS = {
@@ -84,7 +94,7 @@ class TEDIE(SubtitlesInfoExtractor):
        playlist_info = info['playlist']

        playlist_entries = [
-            self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key())
+            self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
            for talk in info['talks']
        ]
        return self.playlist_result(
@@ -102,11 +112,23 @@ class TEDIE(SubtitlesInfoExtractor):
            'url': format_url,
            'format_id': format_id,
            'format': format_id,
-        } for (format_id, format_url) in talk_info['nativeDownloads'].items()]
-        for f in formats:
-            finfo = self._NATIVE_FORMATS.get(f['format_id'])
-            if finfo:
-                f.update(finfo)
+        } for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None]
+        if formats:
+            for f in formats:
+                finfo = self._NATIVE_FORMATS.get(f['format_id'])
+                if finfo:
+                    f.update(finfo)
+        else:
+            # Use rtmp downloads
+            formats = [{
+                'format_id': f['name'],
+                'url': talk_info['streamer'],
+                'play_path': f['file'],
+                'ext': 'flv',
+                'width': f['width'],
+                'height': f['height'],
+                'tbr': f['bitrate'],
+            } for f in talk_info['resources']['rtmp']]
        self._sort_formats(formats)

        video_id = compat_str(talk_info['id'])
@@ -138,7 +160,7 @@ class TEDIE(SubtitlesInfoExtractor):
                sub_lang_list[l] = url
            return sub_lang_list
        else:
-            self._downloader.report_warning(u'video doesn\'t have subtitles')
+            self._downloader.report_warning('video doesn\'t have subtitles')
            return {}

    def _watch_info(self, url, name):
@@ -153,7 +175,10 @@ class TEDIE(SubtitlesInfoExtractor):
        title = self._html_search_regex(
            r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
        description = self._html_search_regex(
-            r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
+            [
+                r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
+                r'(?s)<p><strong>About this talk:</strong>\s+(.*?)</p>',
+            ],
            webpage, 'description', fatal=False)

        return {
@@ -52,7 +52,7 @@ class ThePlatformIE(InfoExtractor):
        head = meta.find(_x('smil:head'))
        body = meta.find(_x('smil:body'))

-        f4m_node = body.find(_x('smil:seq/smil:video'))
+        f4m_node = body.find(_x('smil:seq//smil:video'))
        if f4m_node is not None:
            f4m_url = f4m_node.attrib['src']
            if 'manifest.f4m?' not in f4m_url:
@@ -0,0 +1,60 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+from .discovery import DiscoveryIE
+
+
+class TlcIE(DiscoveryIE):
+    IE_NAME = 'tlc.com'
+    _VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
+
+    _TEST = {
+        'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm',
+        'md5': 'c4038f4a9b44d0b5d74caaa64ed2a01a',
+        'info_dict': {
+            'id': '853232',
+            'ext': 'mp4',
+            'title': 'Cake Boss: Too Big to Fly',
+            'description': 'Buddy has taken on a high flying task.',
+            'duration': 119,
+        },
+    }
+
+
+class TlcDeIE(InfoExtractor):
+    IE_NAME = 'tlc.de'
+    _VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P<title>[^/?]+)'
+
+    _TEST = {
+        'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
+        'info_dict': {
+            'id': '3235167922001',
+            'ext': 'mp4',
+            'title': 'Breaking Amish: Die Welt da draußen',
+            'uploader': 'Discovery Networks - Germany',
+            'description': 'Vier Amische und eine Mennonitin wagen in New York'
+                '  den Sprung in ein komplett anderes Leben. Begleitet sie auf'
+                ' ihrem spannenden Weg.',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        title = mobj.group('title')
+        webpage = self._download_webpage(url, title)
+        iframe_url = self._search_regex(
+            '<iframe src="(http://www\.tlc\.de/wp-content/.+?)"', webpage,
+            'iframe url')
+        # Otherwise we don't get the correct 'BrightcoveExperience' element,
+        # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
+        iframe_url = iframe_url.replace('.htm?', '.php?')
+        iframe = self._download_webpage(iframe_url, title)
+
+        return {
+            '_type': 'url',
+            'url': BrightcoveIE._extract_brightcove_url(iframe),
+            'ie': BrightcoveIE.ie_key(),
+        }
@@ -1,63 +1,83 @@
-import os
+from __future__ import unicode_literals
+
+import json
 import re

 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
+    int_or_none,
+    str_to_int,
 )
-from ..aes import (
-    aes_decrypt_text
-)
+from ..aes import aes_decrypt_text
+

 class Tube8IE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8\.com/.+?/(?P<videoid>\d+)/?)$'
+    _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'
    _TEST = {
-        u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
-        u'file': u'229795.mp4',
-        u'md5': u'e9e0b0c86734e5e3766e653509475db0',
-        u'info_dict': {
-            u"description": u"hot teen Kasia grinding", 
-            u"uploader": u"unknown", 
-            u"title": u"Kasia music video",
-            u"age_limit": 18,
+        'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
+        'file': '229795.mp4',
+        'md5': 'e9e0b0c86734e5e3766e653509475db0',
+        'info_dict': {
+            'description': 'hot teen Kasia grinding',
+            'uploader': 'unknown',
+            'title': 'Kasia music video',
+            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
-        url = 'http://www.' + mobj.group('url')
+        video_id = mobj.group('id')

        req = compat_urllib_request.Request(url)
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)

-        video_title = self._html_search_regex(r'videotitle	="([^"]+)', webpage, u'title')
-        video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False)
-        video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\s|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
-        thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
-        if thumbnail:
-            thumbnail = thumbnail.replace('\\/', '/')
+        flashvars = json.loads(self._html_search_regex(
+            r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))

-        video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url')
-        if webpage.find('"encrypted":true')!=-1:
-            password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
-            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
+        video_url = flashvars['video_url']
+        if flashvars.get('encrypted') is True:
+            video_url = aes_decrypt_text(video_url, flashvars['video_title'], 32).decode('utf-8')
        path = compat_urllib_parse_urlparse(video_url).path
-        extension = os.path.splitext(path)[1][1:]
-        format = path.split('/')[4].split('_')[:2]
-        format = "-".join(format)
+        format_id = '-'.join(path.split('/')[4].split('_')[:2])
+
+        thumbnail = flashvars.get('image_url')
+
+        title = self._html_search_regex(
+            r'videotitle\s*=\s*"([^"]+)', webpage, 'title')
+        description = self._html_search_regex(
+            r'>Description:</strong>(.+?)<', webpage, 'description', fatal=False)
+        uploader = self._html_search_regex(
+            r'<strong class="video-username">(?:<a href="[^"]+">)?([^<]+)(?:</a>)?</strong>',
+            webpage, 'uploader', fatal=False)
+
+        like_count = int_or_none(self._html_search_regex(
+            r"rupVar\s*=\s*'(\d+)'", webpage, 'like count', fatal=False))
+        dislike_count = int_or_none(self._html_search_regex(
+            r"rdownVar\s*=\s*'(\d+)'", webpage, 'dislike count', fatal=False))
+        view_count = self._html_search_regex(
+            r'<strong>Views: </strong>([\d,\.]+)</li>', webpage, 'view count', fatal=False)
+        if view_count:
+            view_count = str_to_int(view_count)
+        comment_count = self._html_search_regex(
+            r'<span id="allCommentsCount">(\d+)</span>', webpage, 'comment count', fatal=False)
+        if comment_count:
+            comment_count = str_to_int(comment_count)

        return {
            'id': video_id,
-            'uploader': video_uploader,
-            'title': video_title,
-            'thumbnail': thumbnail,
-            'description': video_description,
            'url': video_url,
-            'ext': extension,
-            'format': format,
-            'format_id': format,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'format_id': format_id,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'comment_count': comment_count,
            'age_limit': 18,
        }
@@ -17,10 +17,38 @@ from ..utils import (
    RegexNotFoundError,
    std_headers,
    unsmuggle_url,
+    urlencode_postdata,
 )


-class VimeoIE(SubtitlesInfoExtractor):
+class VimeoBaseInfoExtractor(InfoExtractor):
+    _NETRC_MACHINE = 'vimeo'
+    _LOGIN_REQUIRED = False
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            if self._LOGIN_REQUIRED:
+                raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+            return
+        self.report_login()
+        login_url = 'https://vimeo.com/log_in'
+        webpage = self._download_webpage(login_url, None, False)
+        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+        data = urlencode_postdata({
+            'email': username,
+            'password': password,
+            'action': 'login',
+            'service': 'vimeo',
+            'token': token,
+        })
+        login_request = compat_urllib_request.Request(login_url, data)
+        login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        login_request.add_header('Cookie', 'xsrft=%s' % token)
+        self._download_webpage(login_request, None, False, 'Wrong login info')
+
+
+class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
    """Information extractor for vimeo.com."""

    # _VALID_URL matches Vimeo URLs
@@ -33,7 +61,6 @@ class VimeoIE(SubtitlesInfoExtractor):
        (?:videos?/)?
        (?P<id>[0-9]+)
        /?(?:[?&].*)?(?:[#].*)?$'''
-    _NETRC_MACHINE = 'vimeo'
    IE_NAME = 'vimeo'
    _TESTS = [
        {
@@ -111,25 +138,6 @@ class VimeoIE(SubtitlesInfoExtractor):
        else:
            return super(VimeoIE, cls).suitable(url)

-    def _login(self):
-        (username, password) = self._get_login_info()
-        if username is None:
-            return
-        self.report_login()
-        login_url = 'https://vimeo.com/log_in'
-        webpage = self._download_webpage(login_url, None, False)
-        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
-        data = compat_urllib_parse.urlencode({'email': username,
-                                              'password': password,
-                                              'action': 'login',
-                                              'service': 'vimeo',
-                                              'token': token,
-                                              })
-        login_request = compat_urllib_request.Request(login_url, data)
-        login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        login_request.add_header('Cookie', 'xsrft=%s' % token)
-        self._download_webpage(login_request, None, False, 'Wrong login info')
-
    def _verify_video_password(self, url, video_id, webpage):
        password = self._downloader.params.get('videopassword', None)
        if password is None:
@@ -438,3 +446,25 @@ class VimeoReviewIE(InfoExtractor):
        video_id = mobj.group('id')
        player_url = 'https://player.vimeo.com/player/' + video_id
        return self.url_result(player_url, 'Vimeo', video_id)
+
+
+class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
+    IE_NAME = 'vimeo:watchlater'
+    IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
+    _VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
+    _LOGIN_REQUIRED = True
+    _TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
+
+    def _real_initialize(self):
+        self._login()
+
+    def _page_url(self, base_url, pagenum):
+        url = '%s/page:%d/' % (base_url, pagenum)
+        request = compat_urllib_request.Request(url)
+        # Set the header to get a partial html page with the ids,
+        # the normal page doesn't contain them.
+        request.add_header('X-Requested-With', 'XMLHttpRequest')
+        return request
+
+    def _real_extract(self, url):
+        return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')
@@ -0,0 +1,66 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse_urlparse,
+    parse_duration,
+    qualities,
+)
+
+
+class VuClipIE(InfoExtractor):
+    _VALID_URL = r'http://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://m.vuclip.com/w?cid=843902317&fid=63532&z=1007&nvar&frm=index.html&bu=4757321434',
+        'md5': '92ac9d1ccefec4f0bb474661ab144fcf',
+        'info_dict': {
+            'id': '843902317',
+            'ext': '3gp',
+            'title': 'Movie Trailer: Noah',
+            'duration': 139,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        ad_m = re.search(
+            r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
+        if ad_m:
+            urlr = compat_urllib_parse_urlparse(url)
+            adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1)
+            webpage = self._download_webpage(
+                adfree_url, video_id, note='Download post-ad page')
+
+        links_code = self._search_regex(
+            r'(?s)<div class="social align_c".*?>(.*?)<hr\s*/?>', webpage,
+            'links')
+        title = self._html_search_regex(
+            r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip()
+
+        quality_order = qualities(['Reg', 'Hi'])
+        formats = []
+        for url, q in re.findall(
+                r'<a href="(?P<url>[^"]+)".*?>(?P<q>[^<]+)</a>', links_code):
+            format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q
+            formats.append({
+                'format_id': format_id,
+                'url': url,
+                'quality': quality_order(q),
+            })
+        self._sort_formats(formats)
+
+        duration = parse_duration(self._search_regex(
+            r'\(([0-9:]+)\)</span></h1>', webpage, 'duration', fatal=False))
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': title,
+            'duration': duration,
+        }
@@ -1,10 +1,11 @@
 # coding: utf-8
+from __future__ import unicode_literals

 import re
-import json

 from .common import InfoExtractor

+
 class WeiboIE(InfoExtractor):
    """
    The videos in Weibo come from different sites, this IE just finds the link
@@ -13,16 +14,16 @@ class WeiboIE(InfoExtractor):
    _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'

    _TEST = {
-        u'add_ie': ['Sina'],
-        u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
-        u'file': u'98322879.flv',
-        u'info_dict': {
-            u'title': u'魔声耳机最新广告“All Eyes On Us”',
+        'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
+        'info_dict': {
+            'id': '98322879',
+            'ext': 'flv',
+            'title': '魔声耳机最新广告“All Eyes On Us”',
        },
-        u'note': u'Sina video',
-        u'params': {
-            u'skip_download': True,
+        'params': {
+            'skip_download': True,
        },
+        'add_ie': ['Sina'],
    }

    # Additional example videos from different sites
@@ -33,17 +34,16 @@ class WeiboIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
        video_id = mobj.group('id')
        info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
-        info_page = self._download_webpage(info_url, video_id)
-        info = json.loads(info_page)
+        info = self._download_json(info_url, video_id)

        videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
-        #Prefer sina video since they have thumbnails
-        videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
+        # Prefer sina video since they have thumbnails
+        videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
        player_url = videos_urls[-1]
-        m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
+        m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
+            player_url)
        if m_sina is not None:
            self.to_screen('Sina video detected')
            sina_id = m_sina.group(1)
            player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
        return self.url_result(player_url)
-
@@ -14,8 +14,8 @@ from ..utils import (


 class YahooIE(InfoExtractor):
-    IE_DESC = 'Yahoo screen'
-    _VALID_URL = r'https?://screen\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
+    IE_DESC = 'Yahoo screen and movies'
+    _VALID_URL = r'https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
    _TESTS = [
        {
            'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@@ -37,6 +37,16 @@ class YahooIE(InfoExtractor):
                'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
            },
        },
+        {
+            'url': 'https://movies.yahoo.com/video/world-loves-spider-man-190819223.html',
+            'md5': '410b7104aa9893b765bc22787a22f3d9',
+            'info_dict': {
+                'id': '516ed8e2-2c4f-339f-a211-7a8b49d30845',
+                'ext': 'mp4',
+                'title': 'The World Loves Spider-Man',
+                'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''',
+            }
+        }
    ]

    def _real_extract(self, url):
@@ -44,13 +54,20 @@ class YahooIE(InfoExtractor):
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)

-        items_json = self._search_regex(r'mediaItems: ({.*?})$',
-            webpage, 'items', flags=re.MULTILINE)
-        items = json.loads(items_json)
-        info = items['mediaItems']['query']['results']['mediaObj'][0]
-        # The 'meta' field is not always in the video webpage, we request it
-        # from another page
-        long_id = info['id']
+        items_json = self._search_regex(
+            r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
+            default=None)
+        if items_json is None:
+            long_id = self._search_regex(
+                r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
+                webpage, 'content ID')
+            video_id = long_id
+        else:
+            items = json.loads(items_json)
+            info = items['mediaItems']['query']['results']['mediaObj'][0]
+            # The 'meta' field is not always in the video webpage, we request it
+            # from another page
+            long_id = info['id']
        return self._get_info(long_id, video_id)

    def _get_info(self, long_id, video_id):
@@ -104,7 +121,7 @@ class YahooNewsIE(YahooIE):
    IE_NAME = 'yahoo:news'
    _VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
        'md5': '67010fdf3a08d290e060a4dd96baa07b',
        'info_dict': {
@@ -113,10 +130,7 @@ class YahooNewsIE(YahooIE):
            'title': 'China Moses Is Crazy About the Blues',
            'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
        },
-    }
-
-    # Overwrite YahooIE properties we don't want
-    _TESTS = []
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -151,6 +151,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                             )
                         ))
                         |youtu\.be/                                          # just youtu.be/xxxx
+                         |https?://(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                         )
                     )?                                                       # all until now is optional -> you can pass the naked ID
                     ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
@@ -209,23 +210,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},

        # Dash webm
-        '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
-        '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40},
-        '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40},
-        '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
-        '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
-        '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
-        '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40},
-        '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40},
+        '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+        '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+        '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
+        '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+        '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},

        # Dash webm audio
-        '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
-        '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50},
+        '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
+        '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},

        # RTMP (unnamed)
        '_rtmp': {'protocol': 'rtmp'},
@@ -251,7 +252,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            u"info_dict": {
                u"upload_date": u"20120506",
                u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
-                u"description": u"md5:5b292926389560516e384ac437c0ec07",
+                u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
                u"uploader": u"Icona Pop",
                u"uploader_id": u"IconaPop"
            }
@@ -303,7 +304,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                u'id': u'IB3lcPjvWLA',
                u'ext': u'm4a',
                u'title': u'Afrojack - The Spark ft. Spree Wilson',
-                u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
+                u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8',
                u'uploader': u'AfrojackVEVO',
                u'uploader_id': u'AfrojackVEVO',
                u'upload_date': u'20131011',
@@ -1081,9 +1082,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                    break
        if 'token' not in video_info:
            if 'reason' in video_info:
-                raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
+                raise ExtractorError(
+                    u'YouTube said: %s' % video_info['reason'][0],
+                    expected=True, video_id=video_id)
            else:
-                raise ExtractorError(u'"token" parameter not in video info for unknown reason')
+                raise ExtractorError(
+                    u'"token" parameter not in video info for unknown reason',
+                    video_id=video_id)

        if 'view_count' in video_info:
            view_count = int(video_info['view_count'][0])
@@ -1112,7 +1117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):

        # title
        if 'title' in video_info:
-            video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
+            video_title = video_info['title'][0]
        else:
            self._downloader.report_warning(u'Unable to extract video title')
            video_title = u'_'
@@ -1418,7 +1423,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
                return self.url_result(video_id, 'Youtube', video_id=video_id)
            else:
-                self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+                self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

        if playlist_id.startswith('RD'):
            # Mixes require a custom extraction process
@@ -1,5 +1,7 @@

+from .atomicparsley import AtomicParsleyPP
 from .ffmpeg import (
+    FFmpegAudioFixPP,
    FFmpegMergerPP,
    FFmpegMetadataPP,
    FFmpegVideoConvertor,
@@ -9,6 +11,8 @@ from .ffmpeg import (
 from .xattrpp import XAttrMetadataPP

 __all__ = [
+    'AtomicParsleyPP',
+    'FFmpegAudioFixPP',
    'FFmpegMergerPP',
    'FFmpegMetadataPP',
    'FFmpegVideoConvertor',
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+
+import os
+import subprocess
+
+from .common import PostProcessor
+
+from ..utils import (
+    check_executable,
+    compat_urlretrieve,
+    encodeFilename,
+    PostProcessingError,
+    prepend_extension,
+    shell_quote
+)
+
+
+class AtomicParsleyPPError(PostProcessingError):
+    pass
+
+
+class AtomicParsleyPP(PostProcessor):
+    def run(self, info):
+        if not check_executable('AtomicParsley', ['-v']):
+            raise AtomicParsleyPPError('AtomicParsley was not found. Please install.')
+
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+        temp_thumbnail = prepend_extension(filename, 'thumb')
+
+        if not info.get('thumbnail'):
+            raise AtomicParsleyPPError('Thumbnail was not found. Nothing to do.')
+
+        compat_urlretrieve(info['thumbnail'], temp_thumbnail)
+
+        cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename]
+
+        self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
+
+        if self._downloader.params.get('verbose', False):
+            self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
+
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = p.communicate()
+
+        if p.returncode != 0:
+            msg = stderr.decode('utf-8', 'replace').strip()
+            raise AtomicParsleyPPError(msg)
+
+        os.remove(encodeFilename(filename))
+        os.remove(encodeFilename(temp_thumbnail))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return True, info
@@ -53,8 +53,7 @@ class FFmpegPostProcessor(PostProcessor):

        if self._downloader.params.get('verbose', False):
            self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
-        bcmd = [self._downloader.encode(c) for c in cmd]
-        p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = p.communicate()
        if p.returncode != 0:
            stderr = stderr.decode('utf-8', 'replace')
@@ -465,7 +464,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
        filename = info['filepath']
        temp_filename = prepend_extension(filename, 'temp')

-        options = ['-c', 'copy']
+        if info['ext'] == u'm4a':
+            options = ['-vn', '-acodec', 'copy']
+        else:
+            options = ['-c', 'copy']
+
        for (name, value) in metadata.items():
            options.extend(['-metadata', '%s=%s' % (name, value)])

@@ -484,3 +487,17 @@ class FFmpegMergerPP(FFmpegPostProcessor):
        self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
        return True, info

+
+class FFmpegAudioFixPP(FFmpegPostProcessor):
+    def run(self, info):
+        filename = info['filepath']
+        temp_filename = prepend_extension(filename, 'temp')
+
+        options = ['-vn', '-acodec', 'copy']
+        self._downloader.to_screen(u'[ffmpeg] Fixing audio file "%s"' % filename)
+        self.run_ffmpeg(filename, temp_filename, options)
+
+        os.remove(encodeFilename(filename))
+        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+        return True, info
@@ -594,13 +594,15 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):

 class ExtractorError(Exception):
    """Error during info extraction."""
-    def __init__(self, msg, tb=None, expected=False, cause=None):
+    def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
        """ tb, if given, is the original traceback (so that it can be printed out).
        If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
        """

        if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
            expected = True
+        if video_id is not None:
+            msg = video_id + ': ' + msg
        if not expected:
            msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
        super(ExtractorError, self).__init__(msg)
@@ -608,6 +610,7 @@ class ExtractorError(Exception):
        self.traceback = tb
        self.exc_info = sys.exc_info()  # preserve original exception
        self.cause = cause
+        self.video_id = video_id

    def format_traceback(self):
        if self.traceback is None:
@@ -910,11 +913,83 @@ def platform_name():
    return res


+def _windows_write_string(s, out):
+    """ Returns True if the string was written using special methods,
+    False if it has yet to be written out."""
+    # Adapted from http://stackoverflow.com/a/3259271/35070
+
+    import ctypes
+    import ctypes.wintypes
+
+    WIN_OUTPUT_IDS = {
+        1: -11,
+        2: -12,
+    }
+
+    fileno = out.fileno()
+    if fileno not in WIN_OUTPUT_IDS:
+        return False
+
+    GetStdHandle = ctypes.WINFUNCTYPE(
+        ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
+        ("GetStdHandle", ctypes.windll.kernel32))
+    h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
+
+    WriteConsoleW = ctypes.WINFUNCTYPE(
+        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
+        ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
+        ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
+    written = ctypes.wintypes.DWORD(0)
+
+    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
+    FILE_TYPE_CHAR = 0x0002
+    FILE_TYPE_REMOTE = 0x8000
+    GetConsoleMode = ctypes.WINFUNCTYPE(
+        ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
+        ctypes.POINTER(ctypes.wintypes.DWORD))(
+        ("GetConsoleMode", ctypes.windll.kernel32))
+    INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
+
+    def not_a_console(handle):
+        if handle == INVALID_HANDLE_VALUE or handle is None:
+            return True
+        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
+                or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+
+    if not_a_console(h):
+        return False
+
+    def next_nonbmp_pos(s):
+        try:
+            return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
+        except StopIteration:
+            return len(s)
+
+    while s:
+        count = min(next_nonbmp_pos(s), 1024)
+
+        ret = WriteConsoleW(
+            h, s, count if count else 2, ctypes.byref(written), None)
+        if ret == 0:
+            raise OSError('Failed to write string')
+        if not count:  # We just wrote a non-BMP character
+            assert written.value == 2
+            s = s[1:]
+        else:
+            assert written.value > 0
+            s = s[written.value:]
+    return True
+
+
 def write_string(s, out=None, encoding=None):
    if out is None:
        out = sys.stderr
    assert type(s) == compat_str

+    if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
+        if _windows_write_string(s, out):
+            return
+
    if ('b' in getattr(out, 'mode', '') or
            sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
        byt = s.encode(encoding or preferredencoding(), 'ignore')
@@ -1173,7 +1248,10 @@ class HEADRequest(compat_urllib_request.Request):
        return "HEAD"


-def int_or_none(v, scale=1, default=None):
+def int_or_none(v, scale=1, default=None, get_attr=None):
+    if get_attr:
+        if v is not None:
+            v = getattr(v, get_attr, None)
    return default if v is None else (int(v) // scale)


@@ -1334,3 +1412,14 @@ US_RATINGS = {

 def strip_jsonp(code):
    return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
+
+
+def qualities(quality_ids):
+    """ Get a numeric quality value out of a list of possible values """
+    def q(qid):
+        try:
+            return quality_ids.index(qid)
+        except ValueError:
+            return -1
+    return q
+
@@ -1,2 +1,2 @@

-__version__ = '2014.04.07.2'
+__version__ = '2014.04.30'
Author	SHA1	Message	Date
Philipp Hagemeister	f1f25be6db	release 2014.04.30	2014-04-30 02:05:03 +02:00
Philipp Hagemeister	deab8c1960	Merge branch 'master' of github.com:rg3/youtube-dl	2014-04-30 02:04:55 +02:00
Philipp Hagemeister	c57f775710	[YoutubeDL] Add simple tests for format_note (Closes #2825 )	2014-04-30 02:02:41 +02:00
AGSPhoenix	e75cafe9fb	Clean up format list for consistency This should make the format list output look a bit nicer.	2014-04-30 01:52:05 +02:00
Philipp Hagemeister	33ab8453c4	Merge pull request #2813 from dstftw/test-real-download-improvement Improve download mechanism when Range HTTP header is ignored	2014-04-30 01:50:33 +02:00
Philipp Hagemeister	ebd3c7b370	[generic] Add support for protocol-independent URLs (Fixes #2810 )	2014-04-30 01:46:06 +02:00
Philipp Hagemeister	29645a1d44	Merge remote-tracking branch 'pulpe/moviezinese'	2014-04-30 01:37:05 +02:00
Philipp Hagemeister	22d99a801a	[syfy] Add suppor for generic URLs (Fixes #2827 )	2014-04-30 01:35:52 +02:00
Jaime Marquínez Ferrándiz	57b8d84cd9	[5min] Raise an error if the 'success' field is False For example for georestricted videos.	2014-04-29 14:57:38 +02:00
Sergey M․	65e4ad5bfe	[rtbf] Minor changes and YouTube videos support	2014-04-29 19:41:58 +07:00
Nicolas Évrard	98b7d476d9	[RTBFVideo] Remove useless print statement	2014-04-28 23:19:56 +02:00
Nicolas Évrard	201e3c99b9	[RTBFVideo] Add new extractor	2014-04-28 20:32:13 +02:00
Sergey M․	8a7a4a9796	[scivee] Skip test for now	2014-04-28 19:52:32 +07:00
Sergey M․	df297c8794	[http] Improve download mechanism when Range HTTP header is ignored	2014-04-27 09:32:01 +07:00
pulpe	3f53a75f02	[moviezine] Add extractor for moviezine.se (fixes #2808 )	2014-04-26 18:55:29 +02:00
Sergey M․	7c360e3a04	[scivee] Add support for scivee.tv	2014-04-26 20:22:15 +07:00
Sergey M․	d2176c8011	[nrk] Add support for nrk.no (Closes #2804 )	2014-04-25 21:34:44 +07:00
Jaime Marquínez Ferrándiz	aa92f06308	[youtube] Don't call 'unquote_plus' on the video title (fixes #2799 ) It's already unquoted after calling 'compat_parse_qs'. It replaced '+' with spaces, for example in https://www.youtube.com/watch?v=XC0b5YexO-I.	2014-04-25 13:19:03 +02:00
Jaime Marquínez Ferrándiz	e00c9cf599	[youtube] Update test description field	2014-04-25 13:14:15 +02:00
Jaime Marquínez Ferrándiz	ba60a3ebe0	[youtube] Update test description field	2014-04-25 12:57:04 +02:00
Jaime Marquínez Ferrándiz	efb7e11988	[vimeo] Add an extractor for the watch later list (closes #2787 )	2014-04-24 21:51:20 +02:00
Sergey M․	a55c8b7aac	[9gag] Fix post view regex	2014-04-24 19:52:34 +07:00
Jaime Marquínez Ferrándiz	a980bc4324	[vimeo] Fix logging in python 3.x The POST data must be a bytes object.	2014-04-24 14:44:27 +02:00
Sergey M․	4b10aadffc	[dailymotion] Fix user playlist extraction	2014-04-23 19:42:34 +07:00
Sergey M․	5bec574859	[ted] Update test	2014-04-22 19:49:41 +07:00
Philipp Hagemeister	d11271dd29	[youtube] Include video Id in common error message (Fixes #2786 )	2014-04-21 20:34:03 +02:00
Philipp Hagemeister	1d9d26d09b	release 2014.04.21.6	2014-04-21 16:18:32 +02:00
Philipp Hagemeister	c0292e8ab7	[generic] Improve jwplayer detection (Fixes #2731 )	2014-04-21 16:16:53 +02:00
Philipp Hagemeister	f44e5d8b43	[vuclip] Fix VALID_URL regex	2014-04-21 16:14:21 +02:00
Philipp Hagemeister	6ea74538e3	release 2014.04.21.5	2014-04-21 15:56:23 +02:00
Philipp Hagemeister	24b8924b46	[facebook] Correct login (Fixes #2743 )	2014-04-21 15:56:09 +02:00
Philipp Hagemeister	86a3c67112	release 2014.04.21.4	2014-04-21 15:25:16 +02:00
Philipp Hagemeister	8be874370d	Merge branch 'master' of github.com:rg3/youtube-dl	2014-04-21 15:24:51 +02:00
Philipp Hagemeister	aec74dd95a	[vuclip] Add extractor (Fixes #2735 )	2014-04-21 15:24:44 +02:00
Sergey M․	6890574256	[rutube] Add missing whitespace	2014-04-21 19:04:11 +07:00
Sergey M․	d03745c684	[jukebox] Update test md5	2014-04-21 19:00:27 +07:00
Philipp Hagemeister	28746fbd59	[bilibili] Add preliminary support (#2174 ) The URL http://www.bilibili.tv/video/av636603/index_2.html does not work yet.	2014-04-21 13:46:41 +02:00
Philipp Hagemeister	0321213c11	[test_subtitles] Allow more subtitles for TED videos	2014-04-21 13:20:14 +02:00
Philipp Hagemeister	3f0aae4244	release 2014.04.21.3	2014-04-21 12:40:09 +02:00
Philipp Hagemeister	48099643cc	[generic] Be more relaxed when looking for aparat embeds (Fixes #2784 )	2014-04-21 12:37:41 +02:00
Philipp Hagemeister	621f33c9d0	[ted] Extend search for description	2014-04-21 12:37:16 +02:00
Philipp Hagemeister	f07a9f6f43	[ted] Remove superfluous u prefixes	2014-04-21 12:34:32 +02:00
Philipp Hagemeister	e51880fd32	[cnet] Correct JSON capturing	2014-04-21 07:59:29 +02:00
Philipp Hagemeister	88ce273da4	[arte] differentiate JSON outputs	2014-04-21 07:59:16 +02:00
Philipp Hagemeister	b9ba5dfa28	[test helper] Correct only_matching test gathering	2014-04-21 07:56:51 +02:00
Philipp Hagemeister	4086f11929	release 2014.04.21.2	2014-04-21 07:12:12 +02:00
Philipp Hagemeister	478c2c6193	[clubic] Add extractor (Fixes #2773 )	2014-04-21 07:12:02 +02:00
Philipp Hagemeister	d2d6481afb	[mdr] Remove unused imports	2014-04-21 06:49:21 +02:00
Philipp Hagemeister	43acb120f3	release 2014.04.21.1	2014-04-21 06:28:25 +02:00
Philipp Hagemeister	e8f2025edf	[mdr] Add support for modern URLs (Fixes #2775 )	2014-04-21 06:25:21 +02:00
Philipp Hagemeister	a4eb9578af	[yahoo] Add support for movies (Fixes #2780 )	2014-04-21 06:18:04 +02:00
Philipp Hagemeister	fa35cdad02	[condenast\|generic] Add support for condenast embeds (Fixes #2783 )	2014-04-21 05:47:52 +02:00
Philipp Hagemeister	d1b9c912a4	[utils] Fix _windows_write_string (Fixes #2779 ) It turns out that the function did not work for outputs longer than 1024 UCS-2 tokens. Write non-BMP characters one by one to ensure that we count correctly.	2014-04-21 04:59:46 +02:00
Philipp Hagemeister	edec83a025	[infoq] Add support for HTTP downloads (Fixes #722 )	2014-04-21 03:21:34 +02:00
Philipp Hagemeister	c0a7c60815	[infoq] Simplify (#2777 )	2014-04-21 02:55:35 +02:00
Philipp Hagemeister	117a7d1944	Merge remote-tracking branch 'kwbr/master'	2014-04-21 02:48:04 +02:00
Philipp Hagemeister	a40e0dd434	release 2014.04.21	2014-04-21 02:34:53 +02:00
Philipp Hagemeister	188b086dd9	Merge branch 'master' of github.com:rg3/youtube-dl	2014-04-21 02:34:44 +02:00
Philipp Hagemeister	1f27d2c0e1	[steam] Add support for steamcommunity.com (Fixes #2757 )	2014-04-21 02:34:34 +02:00
Kai Weber	7560096db5	[infoq] Simplify playpath calculation	2014-04-20 01:10:30 +02:00
Kai Weber	282cb9c7ba	[infoq] Fix extractor	2014-04-20 01:01:37 +02:00
Sergey M․	3a9d6790ad	[ivi] Update playlist tests	2014-04-20 03:06:50 +07:00
Philipp Hagemeister	0610a3e0b2	Remove unused imports	2014-04-19 19:57:09 +02:00
Philipp Hagemeister	7f9c31df88	[steam] Simplify	2014-04-19 19:55:53 +02:00
Philipp Hagemeister	3fa6b6e293	[steam] Modernize	2014-04-19 19:51:04 +02:00
Philipp Hagemeister	3c50b99ab4	[extremetube] Modernize	2014-04-19 19:42:51 +02:00
Philipp Hagemeister	52fadd5fb2	[test_all_urls] Add support for distributed URL matching test definition	2014-04-19 19:41:06 +02:00
Philipp Hagemeister	5367fe7f4d	[test_all_urls] Simplify	2014-04-19 13:01:15 +02:00
Philipp Hagemeister	427588f6e7	Merge remote-tracking branch 'MikeCol/extremetube-gay'	2014-04-19 12:59:52 +02:00
Philipp Hagemeister	51745be312	release 2014.04.19	2014-04-19 11:55:33 +02:00
Sergey M․	d7f1e7c88f	[rutube] Fix extraction	2014-04-19 15:59:12 +07:00
MikeCol	4145a257be	Extended regex match to include gay clips	2014-04-19 00:29:42 +02:00
Sergey M․	525dc9809e	[noco] Fix test description md5	2014-04-18 21:36:04 +07:00
Sergey M․	1bf3210816	[noco] Add support for noco.tv (Closes #2712 )	2014-04-18 21:11:09 +07:00
Sergey M․	e6c6d10d99	[podomatic] Improve video URL extraction (Closes #2763 )	2014-04-17 19:59:52 +07:00
Jaime Marquínez Ferrándiz	f270256e06	[tlc] Add an extractor for tlc.com It uses the same system as discovery.com	2014-04-16 20:29:31 +02:00
Jaime Marquínez Ferrándiz	f401c6f69f	[canalplus] Download the video in the test It doesn't use rtmpdump now.	2014-04-16 15:54:00 +02:00
Sergey M․	b075d25bed	[canalplus] Prefer f4m and modernize (Closes #2749 )	2014-04-16 20:47:39 +07:00
Jaime Marquínez Ferrándiz	3d1bb6b4dd	Add an extractor for tlc.de (fixes #2748 )	2014-04-16 15:45:05 +02:00
Philipp Hagemeister	1db2666916	[youtube:playlist] Correct playlist ID output The ID now starts with PL, so we don't need to output that twice.	2014-04-15 17:55:52 +02:00
Jaime Marquínez Ferrándiz	8f5c0218d8	[fivemin] Get the 'sid' from the embed page (fixes #2745 ) It allows to download some videos that failed.	2014-04-15 16:18:37 +02:00
Sergey M․	d7666dff82	[9gag] Fix and improve extraction	2014-04-15 19:49:38 +07:00
Jaime Marquínez Ferrándiz	2d4c98dbd1	[ted] Use the rtmp links if there http downloads are not available.	2014-04-14 15:23:12 +02:00
Sergey M․	fd50bf623c	[generic] Modernize tests	2014-04-14 18:56:29 +07:00
Sergey M․	d360a14678	[generic] Update test	2014-04-14 18:51:46 +07:00
Philipp Hagemeister	d0f2ab6969	release 2014.04.13	2014-04-13 03:22:30 +02:00
Philipp Hagemeister	de906ef543	[aol] Add support for playlists (Fixes #2730 )	2014-04-13 03:22:24 +02:00
Sergey M․	2fb3deeca1	[tube8] Fix extraction and modernize	2014-04-13 03:56:32 +07:00
Philipp Hagemeister	66398056f1	Merge branch 'master' of github.com:rg3/youtube-dl	2014-04-12 17:15:16 +02:00
Jaime Marquínez Ferrándiz	77477fa4c9	Merge branch 'atomicparsley' (closes #2436 )	2014-04-12 15:52:42 +02:00
Jaime Marquínez Ferrándiz	a169e18ce1	[atomicparsley] Remove unneeded __init__ method	2014-04-12 15:51:40 +02:00
Jaime Marquínez Ferrándiz	381640e3ac	[brightcove] Only use url from meta element if it has the 'playerKey' field (fixes #2738 )	2014-04-12 12:53:48 +02:00
Sergey M․	37e3410137	[prosiebensat1] Add one more clip id pattern (Closes #2737 )	2014-04-12 02:53:55 +07:00
Jaime Marquínez Ferrándiz	97b5196960	[weibo] Modernize	2014-04-11 16:02:34 +02:00
Sergey M․	6a4f3528c8	[firstpost] Fix extraction	2014-04-11 20:40:42 +07:00
Philipp Hagemeister	b9c76aa1a9	[youtube] Add support for cleanvideosearch.com (Fixes #2734 )	2014-04-11 13:53:05 +02:00
Philipp Hagemeister	0d3070d364	release 2014.04.11.2	2014-04-11 09:44:33 +02:00
Philipp Hagemeister	7753cadbfa	[comedycentral:shows] Add support for TDS special editions (Fixes #2733 )	2014-04-11 09:30:07 +02:00
Philipp Hagemeister	3950450342	[pyvideo] Fix title	2014-04-11 02:20:50 +02:00
Philipp Hagemeister	c82b1fdad6	[slideshare] Fix description	2014-04-11 02:19:15 +02:00
Philipp Hagemeister	b0fb63abe8	[dailymotion:playlist] Fix title	2014-04-11 02:16:46 +02:00
Philipp Hagemeister	3ab34c603e	[comedycentral] Fix test md5sum	2014-04-11 02:14:31 +02:00
Philipp Hagemeister	7d6413341a	release 2014.04.11.1	2014-04-11 01:29:54 +02:00
Philipp Hagemeister	140012d0f6	release 2014.04.11	2014-04-11 01:28:30 +02:00
Philipp Hagemeister	4be9f8c814	[ninegag] Add support for p/ URLs	2014-04-11 01:25:24 +02:00
Sergey M․	5c802bac37	[byutv] Fix test	2014-04-10 19:37:55 +07:00
Sergey M․	6c30ff756a	[mpora] Fix test	2014-04-10 19:10:03 +07:00
Jaime Marquínez Ferrándiz	62749e4708	[morningstar] Also support 'Cover' (#2729 )	2014-04-09 20:51:28 +02:00
Jaime Marquínez Ferrándiz	6b7dee4b38	[morningstar] Recognize urls that use 'videoCenter' (fixes #2729 )	2014-04-09 20:45:49 +02:00
Sergey M․	ef2041eb4e	[br] Add audio extraction and support more URLs (Closes #2728 )	2014-04-09 20:19:27 +07:00
Philipp Hagemeister	29e3e682af	[comedycentral] Match more URLs Looks like they only offer clips instead of full episodes now. We'll need to add new parsing code as well.	2014-04-09 11:43:15 +02:00
Philipp Hagemeister	f983c44199	Merge pull request #2725 from foolscap/subtitles-error-fix Fix subtitle download error reporting (Fixes #2724)	2014-04-09 10:16:06 +02:00
robbie	e4db19511a	Fix subtitle download error reporting (Fixes #2724 )	2014-04-08 15:59:27 +01:00
Sergey M․	c47d21da80	[ntv] Update test	2014-04-08 19:11:40 +07:00
Philipp Hagemeister	269aecd0c0	[ffmpeg] Do not pass in byets to subprocess (Fixes #2717 )	2014-04-07 23:33:05 +02:00
Philipp Hagemeister	aafddb2b0a	Merge remote-tracking branch 'anisse/fix-content-encoding-charset'	2014-04-07 23:27:03 +02:00
Philipp Hagemeister	6262ac8ac5	release 2014.04.07.4	2014-04-07 23:23:54 +02:00
Philipp Hagemeister	89938c719e	Fix Windows output for non-BMP unicode characters	2014-04-07 23:23:48 +02:00
Anisse Astier	ec0fafbb19	[extractor/common] fallback on utf-8 when charset is not found fixes #2721	2014-04-07 23:10:16 +02:00
Philipp Hagemeister	a5863bdf33	release 2014.04.07.3	2014-04-07 22:48:45 +02:00
Philipp Hagemeister	b58ddb32ba	[utils] Completely rewrite Windows output (Fixes #2672 )	2014-04-07 22:48:13 +02:00
pulpe	784763c565	we don't need to run ffmpeg more times	2014-03-26 15:22:52 +01:00
pulpe	39c68260c0	fix ffmpeg metadatapp	2014-03-26 15:22:52 +01:00
pulpe	149254d0d5	fix ffmpeg error, if youtube-dl runs more than once with --embed-thumbnail with same video	2014-03-26 15:22:52 +01:00
pulpe	0c14e2fbe3	add post processor	2014-03-26 15:22:51 +01:00