release 2014.08.29

[promptfile] Remove quality=1 and leave it to default
[promptfile] Add new extractor
2026-06-12 23:50:12 +00:00 · 2014-08-29 01:07:30 +02:00 · 2014-08-29 01:07:18 +02:00 · 2014-08-29 00:20:10 +03:00 · 2014-08-28 18:03:29 +02:00 · 2014-08-28 22:54:06 +07:00
61 changed files with 1262 additions and 832 deletions
@@ -26,5 +26,6 @@ updates_key.pem
 *.m4a
 *.m4v
 *.part
+*.swp
 test/testdata
 .tox
@@ -288,6 +288,10 @@ which means you can modify it, redistribute it or use it however you like.
                                     postprocessors (default)
    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
                                     postprocessors
+    --exec CMD                       Execute a command on the file after
+                                     downloading, similar to find's -exec
+                                     syntax. Example: --exec 'adb push {}
+                                     /sdcard/Music/ && rm {}'

 # CONFIGURATION

@@ -102,7 +102,11 @@ def expect_info_dict(self, expected_dict, got_dict):
            match_rex = re.compile(match_str)

            self.assertTrue(
-                isinstance(got, compat_str) and match_rex.match(got),
+                isinstance(got, compat_str),
+                u'Expected a %s object, but got %s for field %s' % (
+                    compat_str.__name__, type(got).__name__, info_field))
+            self.assertTrue(
+                match_rex.match(got),
                u'field %s (value: %r) should match %r' % (info_field, got, match_str))
        elif isinstance(expected, type):
            got = got_dict.get(info_field)
@@ -27,7 +27,6 @@
    "rejecttitle": null, 
    "retries": 10, 
    "simulate": false, 
-    "skip_download": false, 
    "subtitleslang": null, 
    "subtitlesformat": "srt",
    "test": true, 
@@ -109,7 +109,9 @@ class TestAllURLsMatching(unittest.TestCase):
                if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
                    self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
                else:
-                    self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
+                    self.assertFalse(
+                        ie.suitable(url),
+                        '%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name']))

    def test_keywords(self):
        self.assertMatch(':ytsubs', ['youtube:subscriptions'])
@@ -7,6 +7,7 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

 from test.helper import (
+    assertGreaterEqual,
    get_params,
    gettestcases,
    expect_info_dict,
@@ -27,6 +28,7 @@ from youtube_dl.utils import (
    compat_HTTPError,
    DownloadError,
    ExtractorError,
+    format_bytes,
    UnavailableVideoError,
 )
 from youtube_dl.extractor import get_info_extractor
@@ -102,8 +104,11 @@ def generator(test_case):
        def get_tc_filename(tc):
            return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))

-        def try_rm_tcs_files():
-            for tc in test_cases:
+        res_dict = None
+        def try_rm_tcs_files(tcs=None):
+            if tcs is None:
+                tcs = test_cases
+            for tc in tcs:
                tc_filename = get_tc_filename(tc)
                try_rm(tc_filename)
                try_rm(tc_filename + '.part')
@@ -136,12 +141,26 @@ def generator(test_case):
                self.assertEqual(res_dict['_type'], 'playlist')
                expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
            if 'playlist_mincount' in test_case:
-                self.assertGreaterEqual(
+                assertGreaterEqual(
+                    self,
                    len(res_dict['entries']),
                    test_case['playlist_mincount'],
                    'Expected at least %d in playlist %s, but got only %d' % (
                        test_case['playlist_mincount'], test_case['url'],
                        len(res_dict['entries'])))
+            if 'playlist_count' in test_case:
+                self.assertEqual(
+                    len(res_dict['entries']),
+                    test_case['playlist_count'],
+                    'Expected %d entries in playlist %s, but got %d.' % (
+                        test_case['playlist_count'],
+                        test_case['url'],
+                        len(res_dict['entries']),
+                    ))
+            if 'playlist_duration_sum' in test_case:
+                got_duration = sum(e['duration'] for e in res_dict['entries'])
+                self.assertEqual(
+                    test_case['playlist_duration_sum'], got_duration)

            for tc in test_cases:
                tc_filename = get_tc_filename(tc)
@@ -153,12 +172,27 @@ def generator(test_case):
                if 'md5' in tc:
                    md5_for_file = _file_md5(tc_filename)
                    self.assertEqual(md5_for_file, tc['md5'])
+                expected_minsize = tc.get('file_minsize', 10000)
+                if expected_minsize is not None:
+                    if params.get('test'):
+                        expected_minsize = max(expected_minsize, 10000)
+                    got_fsize = os.path.getsize(tc_filename)
+                    assertGreaterEqual(
+                        self, got_fsize, expected_minsize,
+                        'Expected %s to be at least %s, but it\'s only %s ' %
+                        (tc_filename, format_bytes(expected_minsize),
+                            format_bytes(got_fsize)))
                with io.open(info_json_fn, encoding='utf-8') as infof:
                    info_dict = json.load(infof)

                expect_info_dict(self, tc.get('info_dict', {}), info_dict)
        finally:
            try_rm_tcs_files()
+            if is_playlist and res_dict is not None:
+                # Remove all other files that may have been extracted if the
+                # extractor returns full results even with extract_flat
+                res_tcs = [{'info_dict': e} for e in res_dict['entries']]
+                try_rm_tcs_files(res_tcs)

    return test_template

@@ -1,413 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-## DEPRECATED FILE!
-# Add new tests to the extractors themselves, like this:
-# _TEST = {
-#    'url': 'http://example.com/playlist/42',
-#    'playlist_mincount': 99,
-#    'info_dict': {
-#        'id': '42',
-#        'title': 'Playlist number forty-two',
-#    }
-# }
-
-from __future__ import unicode_literals
-
-# Allow direct execution
-import os
-import sys
-import unittest
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from test.helper import (
-    assertRegexpMatches,
-    assertGreaterEqual,
-    expect_info_dict,
-    FakeYDL,
-)
-
-from youtube_dl.extractor import (
-    AcademicEarthCourseIE,
-    DailymotionPlaylistIE,
-    DailymotionUserIE,
-    VimeoChannelIE,
-    VimeoUserIE,
-    VimeoAlbumIE,
-    VimeoGroupsIE,
-    VineUserIE,
-    UstreamChannelIE,
-    SoundcloudSetIE,
-    SoundcloudUserIE,
-    SoundcloudPlaylistIE,
-    TeacherTubeUserIE,
-    LivestreamIE,
-    LivestreamOriginalIE,
-    NHLVideocenterIE,
-    BambuserChannelIE,
-    BandcampAlbumIE,
-    SmotriCommunityIE,
-    SmotriUserIE,
-    IviCompilationIE,
-    ImdbListIE,
-    KhanAcademyIE,
-    EveryonesMixtapeIE,
-    RutubeChannelIE,
-    RutubePersonIE,
-    GoogleSearchIE,
-    GenericIE,
-    TEDIE,
-    ToypicsUserIE,
-    XTubeUserIE,
-    InstagramUserIE,
-    CSpanIE,
-    AolIE,
-    GameOnePlaylistIE,
-)
-
-
-class TestPlaylists(unittest.TestCase):
-    def assertIsPlaylist(self, info):
-        """Make sure the info has '_type' set to 'playlist'"""
-        self.assertEqual(info['_type'], 'playlist')
-
-    def test_dailymotion_playlist(self):
-        dl = FakeYDL()
-        ie = DailymotionPlaylistIE(dl)
-        result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'SPORT')
-        self.assertTrue(len(result['entries']) > 20)
-
-    def test_dailymotion_user(self):
-        dl = FakeYDL()
-        ie = DailymotionUserIE(dl)
-        result = ie.extract('https://www.dailymotion.com/user/nqtv')
-        self.assertIsPlaylist(result)
-        assertGreaterEqual(self, len(result['entries']), 100)
-        self.assertEqual(result['title'], 'Rémi Gaillard')
-
-    def test_vimeo_channel(self):
-        dl = FakeYDL()
-        ie = VimeoChannelIE(dl)
-        result = ie.extract('http://vimeo.com/channels/tributes')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Vimeo Tributes')
-        self.assertTrue(len(result['entries']) > 24)
-
-    def test_vimeo_user(self):
-        dl = FakeYDL()
-        ie = VimeoUserIE(dl)
-        result = ie.extract('http://vimeo.com/nkistudio/videos')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Nki')
-        self.assertTrue(len(result['entries']) > 65)
-
-    def test_vimeo_album(self):
-        dl = FakeYDL()
-        ie = VimeoAlbumIE(dl)
-        result = ie.extract('http://vimeo.com/album/2632481')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Staff Favorites: November 2013')
-        self.assertTrue(len(result['entries']) > 12)
-
-    def test_vimeo_groups(self):
-        dl = FakeYDL()
-        ie = VimeoGroupsIE(dl)
-        result = ie.extract('http://vimeo.com/groups/rolexawards')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Rolex Awards for Enterprise')
-        self.assertTrue(len(result['entries']) > 72)
-
-    def test_vine_user(self):
-        dl = FakeYDL()
-        ie = VineUserIE(dl)
-        result = ie.extract('https://vine.co/Visa')
-        self.assertIsPlaylist(result)
-        assertGreaterEqual(self, len(result['entries']), 47)
-
-    def test_ustream_channel(self):
-        dl = FakeYDL()
-        ie = UstreamChannelIE(dl)
-        result = ie.extract('http://www.ustream.tv/channel/channeljapan')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '10874166')
-        assertGreaterEqual(self, len(result['entries']), 54)
-
-    def test_soundcloud_set(self):
-        dl = FakeYDL()
-        ie = SoundcloudSetIE(dl)
-        result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'The Royal Concept EP')
-        assertGreaterEqual(self, len(result['entries']), 6)
-
-    def test_soundcloud_user(self):
-        dl = FakeYDL()
-        ie = SoundcloudUserIE(dl)
-        result = ie.extract('https://soundcloud.com/the-concept-band')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '9615865')
-        assertGreaterEqual(self, len(result['entries']), 12)
-
-    def test_soundcloud_likes(self):
-        dl = FakeYDL()
-        ie = SoundcloudUserIE(dl)
-        result = ie.extract('https://soundcloud.com/the-concept-band/likes')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '9615865')
-        assertGreaterEqual(self, len(result['entries']), 1)
-
-    def test_soundcloud_playlist(self):
-        dl = FakeYDL()
-        ie = SoundcloudPlaylistIE(dl)
-        result = ie.extract('http://api.soundcloud.com/playlists/4110309')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '4110309')
-        self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]')
-        assertRegexpMatches(
-            self, result['description'], r'.*?TILT Brass - Bowery Poetry Club')
-        self.assertEqual(len(result['entries']), 6)
-
-    def test_livestream_event(self):
-        dl = FakeYDL()
-        ie = LivestreamIE(dl)
-        result = ie.extract('http://new.livestream.com/tedx/cityenglish')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'TEDCity2.0 (English)')
-        assertGreaterEqual(self, len(result['entries']), 4)
-
-    def test_livestreamoriginal_folder(self):
-        dl = FakeYDL()
-        ie = LivestreamOriginalIE(dl)
-        result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
-        assertGreaterEqual(self, len(result['entries']), 28)
-
-    def test_nhl_videocenter(self):
-        dl = FakeYDL()
-        ie = NHLVideocenterIE(dl)
-        result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '999')
-        self.assertEqual(result['title'], 'Highlights')
-        self.assertEqual(len(result['entries']), 12)
-
-    def test_bambuser_channel(self):
-        dl = FakeYDL()
-        ie = BambuserChannelIE(dl)
-        result = ie.extract('http://bambuser.com/channel/pixelversity')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'pixelversity')
-        assertGreaterEqual(self, len(result['entries']), 60)
-
-    def test_bandcamp_album(self):
-        dl = FakeYDL()
-        ie = BandcampAlbumIE(dl)
-        result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['title'], 'Hierophany of the Open Grave')
-        assertGreaterEqual(self, len(result['entries']), 9)
-        
-    def test_smotri_community(self):
-        dl = FakeYDL()
-        ie = SmotriCommunityIE(dl)
-        result = ie.extract('http://smotri.com/community/video/kommuna')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'kommuna')
-        self.assertEqual(result['title'], 'КПРФ')
-        assertGreaterEqual(self, len(result['entries']), 4)
-        
-    def test_smotri_user(self):
-        dl = FakeYDL()
-        ie = SmotriUserIE(dl)
-        result = ie.extract('http://smotri.com/user/inspector')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'inspector')
-        self.assertEqual(result['title'], 'Inspector')
-        assertGreaterEqual(self, len(result['entries']), 9)
-
-    def test_AcademicEarthCourse(self):
-        dl = FakeYDL()
-        ie = AcademicEarthCourseIE(dl)
-        result = ie.extract('http://academicearth.org/playlists/laws-of-nature/')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'laws-of-nature')
-        self.assertEqual(result['title'], 'Laws of Nature')
-        self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
-        self.assertEqual(len(result['entries']), 4)
-        
-    def test_ivi_compilation(self):
-        dl = FakeYDL()
-        ie = IviCompilationIE(dl)
-        result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'dvoe_iz_lartsa')
-        self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)')
-        assertGreaterEqual(self, len(result['entries']), 24)
-
-    def test_ivi_compilation_season(self):
-        dl = FakeYDL()
-        ie = IviCompilationIE(dl)
-        result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
-        self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон')
-        assertGreaterEqual(self, len(result['entries']), 12)
-        
-    def test_imdb_list(self):
-        dl = FakeYDL()
-        ie = ImdbListIE(dl)
-        result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'JFs9NWw6XI0')
-        self.assertEqual(result['title'], 'March 23, 2012 Releases')
-        self.assertEqual(len(result['entries']), 7)
-
-    def test_khanacademy_topic(self):
-        dl = FakeYDL()
-        ie = KhanAcademyIE(dl)
-        result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'cryptography')
-        self.assertEqual(result['title'], 'Journey into cryptography')
-        self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
-        assertGreaterEqual(self, len(result['entries']), 3)
-
-    def test_EveryonesMixtape(self):
-        dl = FakeYDL()
-        ie = EveryonesMixtapeIE(dl)
-        result = ie.extract('http://everyonesmixtape.com/#/mix/m7m0jJAbMQi')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'm7m0jJAbMQi')
-        self.assertEqual(result['title'], 'Driving')
-        self.assertEqual(len(result['entries']), 24)
-        
-    def test_rutube_channel(self):
-        dl = FakeYDL()
-        ie = RutubeChannelIE(dl)
-        result = ie.extract('http://rutube.ru/tags/video/1800/')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '1800')
-        assertGreaterEqual(self, len(result['entries']), 68)
-
-    def test_rutube_person(self):
-        dl = FakeYDL()
-        ie = RutubePersonIE(dl)
-        result = ie.extract('http://rutube.ru/video/person/313878/')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '313878')
-        assertGreaterEqual(self, len(result['entries']), 37)
-
-    def test_multiple_brightcove_videos(self):
-        # https://github.com/rg3/youtube-dl/issues/2283
-        dl = FakeYDL()
-        ie = GenericIE(dl)
-        result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
-        self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
-        self.assertEqual(len(result['entries']), 3)
-
-    def test_GoogleSearch(self):
-        dl = FakeYDL()
-        ie = GoogleSearchIE(dl)
-        result = ie.extract('gvsearch15:python language')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'python language')
-        self.assertEqual(result['title'], 'python language')
-        self.assertEqual(len(result['entries']), 15)
-
-    def test_generic_rss_feed(self):
-        dl = FakeYDL()
-        ie = GenericIE(dl)
-        result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
-        self.assertEqual(result['title'], 'Zero Punctuation')
-        self.assertTrue(len(result['entries']) > 10)
-
-    def test_ted_playlist(self):
-        dl = FakeYDL()
-        ie = TEDIE(dl)
-        result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '10')
-        self.assertEqual(result['title'], 'Who are the hackers?')
-        assertGreaterEqual(self, len(result['entries']), 6)
-
-    def test_toypics_user(self):
-        dl = FakeYDL()
-        ie = ToypicsUserIE(dl)
-        result = ie.extract('http://videos.toypics.net/Mikey')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'Mikey')
-        assertGreaterEqual(self, len(result['entries']), 17)
-
-    def test_xtube_user(self):
-        dl = FakeYDL()
-        ie = XTubeUserIE(dl)
-        result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'greenshowers')
-        assertGreaterEqual(self, len(result['entries']), 155)
-
-    def test_InstagramUser(self):
-        dl = FakeYDL()
-        ie = InstagramUserIE(dl)
-        result = ie.extract('http://instagram.com/porsche')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'porsche')
-        assertGreaterEqual(self, len(result['entries']), 2)
-        test_video = next(
-            e for e in result['entries']
-            if e['id'] == '614605558512799803_462752227')
-        dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
-        dl.process_video_result(test_video, download=False)
-        EXPECTED = {
-            'id': '614605558512799803_462752227',
-            'ext': 'mp4',
-            'title': '#Porsche Intelligent Performance.',
-            'thumbnail': 're:^https?://.*\.jpg',
-            'uploader': 'Porsche',
-            'uploader_id': 'porsche',
-            'timestamp': 1387486713,
-            'upload_date': '20131219',
-        }
-        expect_info_dict(self, EXPECTED, test_video)
-
-    def test_CSpan_playlist(self):
-        dl = FakeYDL()
-        ie = CSpanIE(dl)
-        result = ie.extract(
-            'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '342759')
-        self.assertEqual(
-            result['title'], 'General Motors Ignition Switch Recall')
-        whole_duration = sum(e['duration'] for e in result['entries'])
-        self.assertEqual(whole_duration, 14855)
-
-    def test_aol_playlist(self):
-        dl = FakeYDL()
-        ie = AolIE(dl)
-        result = ie.extract(
-            'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], '152147')
-        self.assertEqual(
-            result['title'], 'Brace Yourself - Today\'s Weirdest News')
-        assertGreaterEqual(self, len(result['entries']), 10)
-
-    def test_TeacherTubeUser(self):
-        dl = FakeYDL()
-        ie = TeacherTubeUserIE(dl)
-        result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
-        self.assertIsPlaylist(result)
-        self.assertEqual(result['id'], 'rbhagwati2')
-        assertGreaterEqual(self, len(result['entries']), 179)
-
-
-if __name__ == '__main__':
-    unittest.main()
@@ -1,6 +1,8 @@
 #!/usr/bin/env python
 # coding: utf-8

+from __future__ import unicode_literals
+
 # Allow direct execution
 import os
 import sys
@@ -13,7 +15,6 @@ import io
 import json
 import xml.etree.ElementTree

-#from youtube_dl.utils import htmlentity_transform
 from youtube_dl.utils import (
    DateRange,
    encodeFilename,
@@ -41,11 +42,6 @@ from youtube_dl.utils import (
    uppercase_escape,
 )

-if sys.version_info < (3, 0):
-    _compat_str = lambda b: b.decode('unicode-escape')
-else:
-    _compat_str = lambda s: s
-

 class TestUtil(unittest.TestCase):
    def test_timeconvert(self):
@@ -67,9 +63,9 @@ class TestUtil(unittest.TestCase):
        self.assertEqual('this - that', sanitize_filename('this: that'))

        self.assertEqual(sanitize_filename('AT&T'), 'AT&T')
-        aumlaut = _compat_str('\xe4')
+        aumlaut = 'ä'
        self.assertEqual(sanitize_filename(aumlaut), aumlaut)
-        tests = _compat_str('\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430')
+        tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430'
        self.assertEqual(sanitize_filename(tests), tests)

        forbidden = '"\0\\/'
@@ -91,9 +87,9 @@ class TestUtil(unittest.TestCase):
        self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
        self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))

-        tests = _compat_str('a\xe4b\u4e2d\u56fd\u7684c')
+        tests = 'a\xe4b\u4e2d\u56fd\u7684c'
        self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
-        self.assertTrue(sanitize_filename(_compat_str('\xf6'), restricted=True) != '')  # No empty filename
+        self.assertTrue(sanitize_filename('\xf6', restricted=True) != '')  # No empty filename

        forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
        for fc in forbidden:
@@ -101,8 +97,8 @@ class TestUtil(unittest.TestCase):
                self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))

        # Handle a common case more neatly
-        self.assertEqual(sanitize_filename(_compat_str('\u5927\u58f0\u5e26 - Song'), restricted=True), 'Song')
-        self.assertEqual(sanitize_filename(_compat_str('\u603b\u7edf: Speech'), restricted=True), 'Speech')
+        self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song')
+        self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech')
        # .. but make sure the file name is never empty
        self.assertTrue(sanitize_filename('-', restricted=True) != '')
        self.assertTrue(sanitize_filename(':', restricted=True) != '')
@@ -120,7 +116,9 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1])

    def test_unescape_html(self):
-        self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;'))
+        self.assertEqual(unescapeHTML('%20;'), '%20;')
+        self.assertEqual(
+            unescapeHTML('&eacute;'), 'é')
        
    def test_daterange(self):
        _20century = DateRange("19000101","20000101")
@@ -138,7 +136,7 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(unified_strdate('1968-12-10'), '19681210')

    def test_find_xpath_attr(self):
-        testxml = u'''<root>
+        testxml = '''<root>
            <node/>
            <node x="a"/>
            <node x="a" y="c" />
@@ -151,18 +149,18 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])

    def test_meta_parser(self):
-        testhtml = u'''
+        testhtml = '''
        <head>
            <meta name="description" content="foo &amp; bar">
            <meta content='Plato' name='author'/>
        </head>
        '''
        get_meta = lambda name: get_meta_content(name, testhtml)
-        self.assertEqual(get_meta('description'), u'foo & bar')
+        self.assertEqual(get_meta('description'), 'foo & bar')
        self.assertEqual(get_meta('author'), 'Plato')

    def test_xpath_with_ns(self):
-        testxml = u'''<root xmlns:media="http://example.com/">
+        testxml = '''<root xmlns:media="http://example.com/">
            <media:song>
                <media:author>The Author</media:author>
                <url>http://server.com/download.mp3</url>
@@ -171,8 +169,8 @@ class TestUtil(unittest.TestCase):
        doc = xml.etree.ElementTree.fromstring(testxml)
        find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
        self.assertTrue(find('media:song') is not None)
-        self.assertEqual(find('media:song/media:author').text, u'The Author')
-        self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3')
+        self.assertEqual(find('media:song/media:author').text, 'The Author')
+        self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')

    def test_smuggle_url(self):
        data = {u"ö": u"ö", u"abc": [3]}
@@ -187,22 +185,22 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(res_data, None)

    def test_shell_quote(self):
-        args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
-        self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
+        args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
+        self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""")

    def test_str_to_int(self):
        self.assertEqual(str_to_int('123,456'), 123456)
        self.assertEqual(str_to_int('123.456'), 123456)

    def test_url_basename(self):
-        self.assertEqual(url_basename(u'http://foo.de/'), u'')
-        self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
-        self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
-        self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
-        self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
+        self.assertEqual(url_basename('http://foo.de/'), '')
+        self.assertEqual(url_basename('http://foo.de/bar/baz'), 'baz')
+        self.assertEqual(url_basename('http://foo.de/bar/baz?x=y'), 'baz')
+        self.assertEqual(url_basename('http://foo.de/bar/baz#x=y'), 'baz')
+        self.assertEqual(url_basename('http://foo.de/bar/baz/'), 'baz')
        self.assertEqual(
-            url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),
-            u'trailer.mp4')
+            url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
+            'trailer.mp4')

    def test_parse_duration(self):
        self.assertEqual(parse_duration(None), None)
@@ -219,6 +217,7 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(parse_duration('0h0m0s'), 0)
        self.assertEqual(parse_duration('0m0s'), 0)
        self.assertEqual(parse_duration('0s'), 0)
+        self.assertEqual(parse_duration('01:02:03.05'), 3723.05)

    def test_fix_xml_ampersands(self):
        self.assertEqual(
@@ -255,16 +254,16 @@ class TestUtil(unittest.TestCase):
        testPL(5, 2, (20, 99), [])

    def test_struct_unpack(self):
-        self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
+        self.assertEqual(struct_unpack('!B', b'\x00'), (0,))

    def test_read_batch_urls(self):
-        f = io.StringIO(u'''\xef\xbb\xbf foo
+        f = io.StringIO('''\xef\xbb\xbf foo
            bar\r
            baz
            # More after this line\r
            ; or after this
            bam''')
-        self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
+        self.assertEqual(read_batch_urls(f), ['foo', 'bar', 'baz', 'bam'])

    def test_urlencode_postdata(self):
        data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
@@ -281,8 +280,8 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(d, [{"id": "532cb", "x": 3}])

    def test_uppercase_escape(self):
-        self.assertEqual(uppercase_escape(u'aä'), u'aä')
-        self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
+        self.assertEqual(uppercase_escape('aä'), 'aä')
+        self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')

 if __name__ == '__main__':
    unittest.main()
@@ -172,6 +172,7 @@ class YoutubeDL(object):
    The following options are used by the post processors:
    prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
                       otherwise prefer avconv.
+    exec_cmd:          Arbitrary command to run after downloading
    """

    params = None
@@ -424,7 +425,7 @@ class YoutubeDL(object):
            autonumber_templ = '%0' + str(autonumber_size) + 'd'
            template_dict['autonumber'] = autonumber_templ % self._num_downloads
            if template_dict.get('playlist_index') is not None:
-                template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
+                template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
            if template_dict.get('resolution') is None:
                if template_dict.get('width') and template_dict.get('height'):
                    template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
@@ -636,6 +637,7 @@ class YoutubeDL(object):
            for i, entry in enumerate(entries, 1):
                self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
                extra = {
+                    'n_entries': n_entries,
                    'playlist': playlist,
                    'playlist_index': i + playliststart,
                    'extractor': ie_result['extractor'],
@@ -73,6 +73,7 @@ __authors__  = (
    'Erik Johnson',
    'Keith Beckman',
    'Ole Ernst',
+    'Aaron McDaniel (mcd1992)',
 )

 __license__ = 'Public Domain'
@@ -119,6 +120,7 @@ from .postprocessor import (
    FFmpegExtractAudioPP,
    FFmpegEmbedSubtitlePP,
    XAttrMetadataPP,
+    ExecAfterDownloadPP,
 )


@@ -550,7 +552,9 @@ def parseOpts(overrideArguments=None):
        help='Prefer avconv over ffmpeg for running the postprocessors (default)')
    postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg',
        help='Prefer ffmpeg over avconv for running the postprocessors')
-
+    postproc.add_option(
+        '--exec', metavar='CMD', dest='exec_cmd',
+        help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' )

    parser.add_option_group(general)
    parser.add_option_group(selection)
@@ -831,6 +835,7 @@ def _real_main(argv=None):
        'default_search': opts.default_search,
        'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
        'encoding': opts.encoding,
+        'exec_cmd': opts.exec_cmd,
    }

    with YoutubeDL(ydl_opts) as ydl:
@@ -854,6 +859,13 @@ def _real_main(argv=None):
                ydl.add_post_processor(FFmpegAudioFixPP())
            ydl.add_post_processor(AtomicParsleyPP())

+
+        # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
+        # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
+        if opts.exec_cmd:
+            ydl.add_post_processor(ExecAfterDownloadPP(
+                verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
+
        # Update version
        if opts.update_self:
            update_self(ydl.to_screen, opts.verbose)
@@ -3,6 +3,7 @@ import subprocess

 from .common import FileDownloader
 from ..utils import (
+    check_executable,
    encodeFilename,
 )

@@ -19,13 +20,11 @@ class HlsFD(FileDownloader):
            encodeFilename(tmpfilename, for_subprocess=True)]

        for program in ['avconv', 'ffmpeg']:
-            try:
-                subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+            if check_executable(program, ['-version']):
                break
-            except (OSError, IOError):
-                pass
        else:
            self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+            return False
        cmd = [program] + args

        retval = subprocess.call(cmd)
@@ -42,5 +41,5 @@ class HlsFD(FileDownloader):
            return True
        else:
            self.to_stderr(u"\n")
-            self.report_error(u'ffmpeg exited with code %d' % retval)
+            self.report_error(u'%s exited with code %d' % (program, retval))
            return False
@@ -8,9 +8,10 @@ import time

 from .common import FileDownloader
 from ..utils import (
+    check_executable,
+    compat_str,
    encodeFilename,
    format_bytes,
-    compat_str,
 )


@@ -103,9 +104,7 @@ class RtmpFD(FileDownloader):
        test = self.params.get('test', False)

        # Check for rtmpdump first
-        try:
-            subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
-        except (OSError, IOError):
+        if not check_executable('rtmpdump', ['-h']):
            self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
            return False

@@ -9,7 +9,7 @@ from .allocine import AllocineIE
 from .aparat import AparatIE
 from .appletrailers import AppleTrailersIE
 from .archiveorg import ArchiveOrgIE
-from .ard import ARDIE
+from .ard import ARDIE, ARDMediathekIE
 from .arte import (
    ArteTvIE,
    ArteTVPlus7IE,
@@ -209,6 +209,7 @@ from .mtv import (
    MTVIggyIE,
 )
 from .musicplayon import MusicPlayOnIE
+from .musicvault import MusicVaultIE
 from .muzu import MuzuTVIE
 from .myspace import MySpaceIE
 from .myspass import MySpassIE
@@ -256,6 +257,7 @@ from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
 from .pornhub import PornHubIE
 from .pornotube import PornotubeIE
+from .promptfile import PromptFileIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .pyvideo import PyvideoIE
 from .radiofrance import RadioFranceIE
@@ -315,6 +317,7 @@ from .spankwire import SpankwireIE
 from .spiegel import SpiegelIE
 from .spiegeltv import SpiegeltvIE
 from .spike import SpikeIE
+from .sportdeutschland import SportDeutschlandIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .streamcloud import StreamcloudIE
@@ -7,6 +7,15 @@ from .common import InfoExtractor
 class AcademicEarthCourseIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
    IE_NAME = 'AcademicEarth:Course'
+    _TEST = {
+        'url': 'http://academicearth.org/playlists/laws-of-nature/',
+        'info_dict': {
+            'id': 'laws-of-nature',
+            'title': 'Laws of Nature',
+            'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
+        },
+        'playlist_count': 4,
+    }

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
@@ -21,7 +21,7 @@ class AolIE(InfoExtractor):
        (?:$|\?)
    '''

-    _TEST = {
+    _TESTS = [{
        'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
        'md5': '18ef68f48740e86ae94b98da815eec42',
        'info_dict': {
@@ -30,7 +30,14 @@ class AolIE(InfoExtractor):
            'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
        },
        'add_ie': ['FiveMin'],
-    }
+    }, {
+        'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
+        'info_dict': {
+            'id': '152147',
+            'title': 'Brace Yourself - Today\'s Weirdest News',
+        },
+        'playlist_mincount': 10,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -10,10 +10,14 @@ from ..utils import (
    qualities,
    compat_urllib_parse_urlparse,
    compat_urllib_parse,
+    int_or_none,
+    parse_duration,
+    unified_strdate,
 )


-class ARDIE(InfoExtractor):
+class ARDMediathekIE(InfoExtractor):
+    IE_NAME = 'ARD:mediathek'
    _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'

    _TESTS = [{
@@ -128,3 +132,60 @@ class ARDIE(InfoExtractor):
            'formats': formats,
            'thumbnail': thumbnail,
        }
+
+
+class ARDIE(InfoExtractor):
+    _VALID_URL = '(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
+    _TEST = {
+        'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
+        'md5': 'd216c3a86493f9322545e045ddc3eb35',
+        'info_dict': {
+            'display_id': 'die-story-im-ersten-mission-unter-falscher-flagge',
+            'id': '100',
+            'ext': 'mp4',
+            'duration': 2600,
+            'title': 'Die Story im Ersten: Mission unter falscher Flagge',
+            'upload_date': '20140804',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+
+        player_url = mobj.group('mainurl') + '~playerXml.xml'
+        doc = self._download_xml(player_url, display_id)
+        video_node = doc.find('./video')
+        upload_date = unified_strdate(video_node.find('./broadcastDate').text)
+        thumbnail = video_node.find('.//teaserImage//variant/url').text
+
+        formats = []
+        for a in video_node.findall('.//asset'):
+            f = {
+                'format_id': a.attrib['type'],
+                'width': int_or_none(a.find('./frameWidth').text),
+                'height': int_or_none(a.find('./frameHeight').text),
+                'vbr': int_or_none(a.find('./bitrateVideo').text),
+                'abr': int_or_none(a.find('./bitrateAudio').text),
+                'vcodec': a.find('./codecVideo').text,
+                'tbr': int_or_none(a.find('./totalBitrate').text),
+            }
+            if a.find('./serverPrefix').text:
+                f['url'] = a.find('./serverPrefix').text
+                f['playpath'] = a.find('./fileName').text
+            else:
+                f['url'] = a.find('./fileName').text
+            formats.append(f)
+        self._sort_formats(formats)
+
+        return {
+            'id': mobj.group('id'),
+            'formats': formats,
+            'display_id': display_id,
+            'title': video_node.find('./title').text,
+            'duration': parse_duration(video_node.find('./duration').text),
+            'upload_date': upload_date,
+            'thumbnail': thumbnail,
+        }
+
@@ -59,6 +59,13 @@ class BambuserChannelIE(InfoExtractor):
    _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
    # The maximum number we can get with each request
    _STEP = 50
+    _TEST = {
+        'url': 'http://bambuser.com/channel/pixelversity',
+        'info_dict': {
+            'title': 'pixelversity',
+        },
+        'playlist_mincount': 60,
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -73,10 +80,10 @@ class BambuserChannelIE(InfoExtractor):
            req = compat_urllib_request.Request(req_url)
            # Without setting this header, we wouldn't get any result
            req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
-            info_json = self._download_webpage(req, user,
-                'Downloading page %d' % i)
-            results = json.loads(info_json)['result']
-            if len(results) == 0:
+            data = self._download_json(
+                req, user, 'Downloading page %d' % i)
+            results = data['result']
+            if not results:
                break
            last_id = results[-1]['vid']
            urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
@@ -96,7 +96,7 @@ class BandcampAlbumIE(InfoExtractor):
    IE_NAME = 'Bandcamp:album'
    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
        'playlist': [
            {
@@ -118,7 +118,13 @@ class BandcampAlbumIE(InfoExtractor):
            'playlistend': 2
        },
        'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
-    }
+    }, {
+        'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
+        'info_dict': {
+            'title': 'Hierophany of the Open Grave',
+        },
+        'playlist_mincount': 9,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -15,7 +15,7 @@ from ..utils import (


 class BlipTVIE(SubtitlesInfoExtractor):
-    _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_TESTS]+)))'
+    _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'

    _TESTS = [
        {
@@ -25,7 +25,7 @@ class CBSIE(InfoExtractor):
    }, {
        'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
        'info_dict': {
-            'id': 'P9gjWjelt6iP',
+            'id': 'WWF_5KqY3PK1',
            'ext': 'flv',
            'title': 'Live on Letterman - St. Vincent',
            'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import base64
 import hashlib
 import json
@@ -114,7 +116,7 @@ class InfoExtractor(object):
    upload_date:    Video upload date (YYYYMMDD).
                    If not explicitly set, calculated from timestamp.
    uploader_id:    Nickname or id of the video uploader.
-    location:       Physical location of the video.
+    location:       Physical location where the video was filmed.
    subtitles:      The subtitle file contents as a dictionary in the format
                    {language: subtitles}.
    duration:       Length of the video in seconds, as an integer.
@@ -202,17 +204,17 @@ class InfoExtractor(object):
            self.report_download_webpage(video_id)
        elif note is not False:
            if video_id is None:
-                self.to_screen(u'%s' % (note,))
+                self.to_screen('%s' % (note,))
            else:
-                self.to_screen(u'%s: %s' % (video_id, note))
+                self.to_screen('%s: %s' % (video_id, note))
        try:
            return self._downloader.urlopen(url_or_request)
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            if errnote is False:
                return False
            if errnote is None:
-                errnote = u'Unable to download webpage'
-            errmsg = u'%s: %s' % (errnote, compat_str(err))
+                errnote = 'Unable to download webpage'
+            errmsg = '%s: %s' % (errnote, compat_str(err))
            if fatal:
                raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
            else:
@@ -249,7 +251,7 @@ class InfoExtractor(object):
                url = url_or_request.get_full_url()
            except AttributeError:
                url = url_or_request
-            self.to_screen(u'Dumping request to ' + url)
+            self.to_screen('Dumping request to ' + url)
            dump = base64.b64encode(webpage_bytes).decode('ascii')
            self._downloader.to_screen(dump)
        if self._downloader.params.get('write_pages', False):
@@ -259,11 +261,11 @@ class InfoExtractor(object):
                url = url_or_request
            basen = '%s_%s' % (video_id, url)
            if len(basen) > 240:
-                h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+                h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
                basen = basen[:240 - len(h)] + h
            raw_filename = basen + '.dump'
            filename = sanitize_filename(raw_filename, restricted=True)
-            self.to_screen(u'Saving request to ' + filename)
+            self.to_screen('Saving request to ' + filename)
            with open(filename, 'wb') as outf:
                outf.write(webpage_bytes)

@@ -272,14 +274,14 @@ class InfoExtractor(object):
        except LookupError:
            content = webpage_bytes.decode('utf-8', 'replace')

-        if (u'<title>Access to this site is blocked</title>' in content and
-                u'Websense' in content[:512]):
-            msg = u'Access to this webpage has been blocked by Websense filtering software in your network.'
+        if ('<title>Access to this site is blocked</title>' in content and
+                'Websense' in content[:512]):
+            msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
            blocked_iframe = self._html_search_regex(
                r'<iframe src="([^"]+)"', content,
-                u'Websense information URL', default=None)
+                'Websense information URL', default=None)
            if blocked_iframe:
-                msg += u' Visit %s for more details' % blocked_iframe
+                msg += ' Visit %s for more details' % blocked_iframe
            raise ExtractorError(msg, expected=True)

        return (content, urlh)
@@ -294,7 +296,7 @@ class InfoExtractor(object):
            return content

    def _download_xml(self, url_or_request, video_id,
-                      note=u'Downloading XML', errnote=u'Unable to download XML',
+                      note='Downloading XML', errnote='Unable to download XML',
                      transform_source=None, fatal=True):
        """Return the xml as an xml.etree.ElementTree.Element"""
        xml_string = self._download_webpage(
@@ -306,8 +308,8 @@ class InfoExtractor(object):
        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))

    def _download_json(self, url_or_request, video_id,
-                       note=u'Downloading JSON metadata',
-                       errnote=u'Unable to download JSON metadata',
+                       note='Downloading JSON metadata',
+                       errnote='Unable to download JSON metadata',
                       transform_source=None,
                       fatal=True):
        json_string = self._download_webpage(
@@ -322,29 +324,29 @@ class InfoExtractor(object):
            raise ExtractorError('Failed to download JSON', cause=ve)

    def report_warning(self, msg, video_id=None):
-        idstr = u'' if video_id is None else u'%s: ' % video_id
+        idstr = '' if video_id is None else '%s: ' % video_id
        self._downloader.report_warning(
-            u'[%s] %s%s' % (self.IE_NAME, idstr, msg))
+            '[%s] %s%s' % (self.IE_NAME, idstr, msg))

    def to_screen(self, msg):
        """Print msg to screen, prefixing it with '[ie_name]'"""
-        self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
+        self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))

    def report_extraction(self, id_or_name):
        """Report information extraction."""
-        self.to_screen(u'%s: Extracting information' % id_or_name)
+        self.to_screen('%s: Extracting information' % id_or_name)

    def report_download_webpage(self, video_id):
        """Report webpage download."""
-        self.to_screen(u'%s: Downloading webpage' % video_id)
+        self.to_screen('%s: Downloading webpage' % video_id)

    def report_age_confirmation(self):
        """Report attempt to confirm age."""
-        self.to_screen(u'Confirming age')
+        self.to_screen('Confirming age')

    def report_login(self):
        """Report attempt to log in."""
-        self.to_screen(u'Logging in')
+        self.to_screen('Logging in')

    #Methods for following #608
    @staticmethod
@@ -384,7 +386,7 @@ class InfoExtractor(object):
                    break

        if os.name != 'nt' and sys.stderr.isatty():
-            _name = u'\033[0;34m%s\033[0m' % name
+            _name = '\033[0;34m%s\033[0m' % name
        else:
            _name = name

@@ -394,10 +396,10 @@ class InfoExtractor(object):
        elif default is not _NO_DEFAULT:
            return default
        elif fatal:
-            raise RegexNotFoundError(u'Unable to extract %s' % _name)
+            raise RegexNotFoundError('Unable to extract %s' % _name)
        else:
-            self._downloader.report_warning(u'unable to extract %s; '
-                u'please report this issue on http://yt-dl.org/bug' % _name)
+            self._downloader.report_warning('unable to extract %s; '
+                'please report this issue on http://yt-dl.org/bug' % _name)
            return None

    def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
@@ -436,7 +438,7 @@ class InfoExtractor(object):
                else:
                    raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
            except (IOError, netrc.NetrcParseError) as err:
-                self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
+                self._downloader.report_warning('parsing .netrc: %s' % compat_str(err))
        
        return (username, password)

@@ -476,7 +478,7 @@ class InfoExtractor(object):
        return unescapeHTML(escaped)

    def _og_search_thumbnail(self, html, **kargs):
-        return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)
+        return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs)

    def _og_search_description(self, html, **kargs):
        return self._og_search_property('description', html, fatal=False, **kargs)
@@ -535,7 +537,7 @@ class InfoExtractor(object):

    def _sort_formats(self, formats):
        if not formats:
-            raise ExtractorError(u'No video formats found')
+            raise ExtractorError('No video formats found')

        def _formats_key(f):
            # TODO remove the following workaround
@@ -555,9 +557,9 @@ class InfoExtractor(object):

            if f.get('vcodec') == 'none':  # audio only
                if self._downloader.params.get('prefer_free_formats'):
-                    ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus']
+                    ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
                else:
-                    ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a']
+                    ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
                ext_preference = 0
                try:
                    audio_ext_preference = ORDER.index(f['ext'])
@@ -565,9 +567,9 @@ class InfoExtractor(object):
                    audio_ext_preference = -1
            else:
                if self._downloader.params.get('prefer_free_formats'):
-                    ORDER = [u'flv', u'mp4', u'webm']
+                    ORDER = ['flv', 'mp4', 'webm']
                else:
-                    ORDER = [u'webm', u'flv', u'mp4']
+                    ORDER = ['webm', 'flv', 'mp4']
                try:
                    ext_preference = ORDER.index(f['ext'])
                except ValueError:
@@ -609,7 +611,7 @@ class InfoExtractor(object):

    def _sleep(self, timeout, video_id, msg_template=None):
        if msg_template is None:
-            msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
+            msg_template = '%(video_id)s: Waiting for %(timeout)s seconds'
        msg = msg_template % {'video_id': video_id, 'timeout': timeout}
        self.to_screen(msg)
        time.sleep(timeout)
@@ -620,11 +622,15 @@ class InfoExtractor(object):
            'Unable to download f4m manifest')

        formats = []
-        for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
+        media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
+        for i, media_el in enumerate(media_nodes):
+            tbr = int_or_none(media_el.attrib.get('bitrate'))
+            format_id = 'f4m-%d' % (i if tbr is None else tbr)
            formats.append({
+                'format_id': format_id,
                'url': manifest_url,
                'ext': 'flv',
-                'tbr': int_or_none(media_el.attrib.get('bitrate')),
+                'tbr': tbr,
                'width': int_or_none(media_el.attrib.get('width')),
                'height': int_or_none(media_el.attrib.get('height')),
            })
@@ -632,6 +638,58 @@ class InfoExtractor(object):

        return formats

+    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None):
+        formats = [{
+            'format_id': 'm3u8-meta',
+            'url': m3u8_url,
+            'ext': ext,
+            'protocol': 'm3u8',
+            'preference': -1,
+            'resolution': 'multiple',
+            'format_note': 'Quality selection URL',
+        }]
+
+        m3u8_doc = self._download_webpage(m3u8_url, video_id)
+        last_info = None
+        kv_rex = re.compile(
+            r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
+        for line in m3u8_doc.splitlines():
+            if line.startswith('#EXT-X-STREAM-INF:'):
+                last_info = {}
+                for m in kv_rex.finditer(line):
+                    v = m.group('val')
+                    if v.startswith('"'):
+                        v = v[1:-1]
+                    last_info[m.group('key')] = v
+            elif line.startswith('#') or not line.strip():
+                continue
+            else:
+                if last_info is None:
+                    formats.append({'url': line})
+                    continue
+                tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
+
+                f = {
+                    'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
+                    'url': line.strip(),
+                    'tbr': tbr,
+                    'ext': ext,
+                }
+                codecs = last_info.get('CODECS')
+                if codecs:
+                    video, audio = codecs.split(',')
+                    f['vcodec'] = video.partition('.')[0]
+                    f['acodec'] = audio.partition('.')[0]
+                resolution = last_info.get('RESOLUTION')
+                if resolution:
+                    width_str, height_str = resolution.split('x')
+                    f['width'] = int(width_str)
+                    f['height'] = int(height_str)
+                formats.append(f)
+                last_info = {}
+        self._sort_formats(formats)
+        return formats
+

 class SearchInfoExtractor(InfoExtractor):
    """
@@ -651,7 +709,7 @@ class SearchInfoExtractor(InfoExtractor):
    def _real_extract(self, query):
        mobj = re.match(self._make_valid_url(), query)
        if mobj is None:
-            raise ExtractorError(u'Invalid search query "%s"' % query)
+            raise ExtractorError('Invalid search query "%s"' % query)

        prefix = mobj.group('prefix')
        query = mobj.group('query')
@@ -662,9 +720,9 @@ class SearchInfoExtractor(InfoExtractor):
        else:
            n = int(prefix)
            if n <= 0:
-                raise ExtractorError(u'invalid download number %s for query "%s"' % (n, query))
+                raise ExtractorError('invalid download number %s for query "%s"' % (n, query))
            elif n > self._MAX_RESULTS:
-                self._downloader.report_warning(u'%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
+                self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
                n = self._MAX_RESULTS
            return self._get_n_results(query, n)

@@ -34,6 +34,13 @@ class CSpanIE(InfoExtractor):
            'title': 'International Health Care Models',
            'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
        }
+    }, {
+        'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
+        'info_dict': {
+            'id': '342759',
+            'title': 'General Motors Ignition Switch Recall',
+        },
+        'playlist_duration_sum': 14855,
    }]

    def _real_extract(self, url):
@@ -1,3 +1,6 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
 import re
 import json
 import itertools
@@ -28,51 +31,53 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
    """Information Extractor for Dailymotion"""

    _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
-    IE_NAME = u'dailymotion'
+    IE_NAME = 'dailymotion'

    _FORMATS = [
-        (u'stream_h264_ld_url', u'ld'),
-        (u'stream_h264_url', u'standard'),
-        (u'stream_h264_hq_url', u'hq'),
-        (u'stream_h264_hd_url', u'hd'),
-        (u'stream_h264_hd1080_url', u'hd180'),
+        ('stream_h264_ld_url', 'ld'),
+        ('stream_h264_url', 'standard'),
+        ('stream_h264_hq_url', 'hq'),
+        ('stream_h264_hd_url', 'hd'),
+        ('stream_h264_hd1080_url', 'hd180'),
    ]

    _TESTS = [
        {
-            u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
-            u'file': u'x33vw9.mp4',
-            u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
-            u'info_dict': {
-                u"uploader": u"Amphora Alex and Van .", 
-                u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
+            'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
+            'md5': '392c4b85a60a90dc4792da41ce3144eb',
+            'info_dict': {
+                'id': 'x33vw9',
+                'ext': 'mp4',
+                'uploader': 'Amphora Alex and Van .',
+                'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
            }
        },
        # Vevo video
        {
-            u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
-            u'file': u'USUV71301934.mp4',
-            u'info_dict': {
-                u'title': u'Roar (Official)',
-                u'uploader': u'Katy Perry',
-                u'upload_date': u'20130905',
+            'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
+            'info_dict': {
+                'title': 'Roar (Official)',
+                'id': 'USUV71301934',
+                'ext': 'mp4',
+                'uploader': 'Katy Perry',
+                'upload_date': '20130905',
            },
-            u'params': {
-                u'skip_download': True,
+            'params': {
+                'skip_download': True,
            },
-            u'skip': u'VEVO is only available in some countries',
+            'skip': 'VEVO is only available in some countries',
        },
        # age-restricted video
        {
-            u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
-            u'file': u'xyh2zz.mp4',
-            u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
-            u'info_dict': {
-                u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
-                u'uploader': 'HotWaves1012',
-                u'age_limit': 18,
+            'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
+            'md5': '0d667a7b9cebecc3c89ee93099c4159d',
+            'info_dict': {
+                'id': 'xyh2zz',
+                'ext': 'mp4',
+                'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
+                'uploader': 'HotWaves1012',
+                'age_limit': 18,
            }
-
        }
    ]

@@ -97,8 +102,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
            webpage)
        if m_vevo is not None:
            vevo_id = m_vevo.group('id')
-            self.to_screen(u'Vevo video detected: %s' % vevo_id)
-            return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')
+            self.to_screen('Vevo video detected: %s' % vevo_id)
+            return self.url_result('vevo:%s' % vevo_id, ie='Vevo')

        age_limit = self._rta_search(webpage)

@@ -109,7 +114,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):

        embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
        embed_page = self._download_webpage(embed_url, video_id,
-                                            u'Downloading embed page')
+                                            'Downloading embed page')
        info = self._search_regex(r'var info = ({.*?}),$', embed_page,
            'video info', flags=re.MULTILINE)
        info = json.loads(info)
@@ -134,7 +139,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
                    'height': height,
                })
        if not formats:
-            raise ExtractorError(u'Unable to extract video URL')
+            raise ExtractorError('Unable to extract video URL')

        # subtitles
        video_subtitles = self.extract_subtitles(video_id, webpage)
@@ -143,7 +148,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
            return

        view_count = self._search_regex(
-            r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False)
+            r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, 'view count', fatal=False)
        if view_count is not None:
            view_count = str_to_int(view_count)

@@ -165,28 +170,35 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
                'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
                video_id, note=False)
        except ExtractorError as err:
-            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+            self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
            return {}
        info = json.loads(sub_list)
        if (info['total'] > 0):
            sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
            return sub_lang_list
-        self._downloader.report_warning(u'video doesn\'t have subtitles')
+        self._downloader.report_warning('video doesn\'t have subtitles')
        return {}


 class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
-    IE_NAME = u'dailymotion:playlist'
+    IE_NAME = 'dailymotion:playlist'
    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
    _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
    _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
+    _TESTS = [{
+        'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
+        'info_dict': {
+            'title': 'SPORT',
+        },
+        'playlist_mincount': 20,
+    }]

    def _extract_entries(self, id):
        video_ids = []
        for pagenum in itertools.count(1):
            request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
            webpage = self._download_webpage(request,
-                                             id, u'Downloading page %s' % pagenum)
+                                             id, 'Downloading page %s' % pagenum)

            video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))

@@ -209,9 +221,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):


 class DailymotionUserIE(DailymotionPlaylistIE):
-    IE_NAME = u'dailymotion:user'
+    IE_NAME = 'dailymotion:user'
    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
    _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
+    _TESTS = [{
+        'url': 'https://www.dailymotion.com/user/nqtv',
+        'info_dict': {
+            'id': 'nqtv',
+            'title': 'Rémi Gaillard',
+        },
+        'playlist_mincount': 100,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -219,7 +239,7 @@ class DailymotionUserIE(DailymotionPlaylistIE):
        webpage = self._download_webpage(url, user)
        full_user = unescapeHTML(self._html_search_regex(
            r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
-            webpage, u'user', flags=re.DOTALL))
+            webpage, 'user'))

        return {
            '_type': 'playlist',
@@ -11,8 +11,7 @@ from ..utils import compat_urllib_parse_unquote
 class DropboxIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
    _TEST = {
-        'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4',
-        'md5': '8a3d905427a6951ccb9eb292f154530b',
+        'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
        'info_dict': {
            'id': 'nelirfsxnmcfbfh',
            'ext': 'mp4',
@@ -25,7 +24,9 @@ class DropboxIE(InfoExtractor):
        video_id = mobj.group('id')
        fn = compat_urllib_parse_unquote(mobj.group('title'))
        title = os.path.splitext(fn)[0]
-        video_url = url + '?dl=1'
+        video_url = (
+            re.sub(r'[?&]dl=0', '', url) +
+            ('?' if '?' in url else '&') + 'dl=1')

        return {
            'id': video_id,
@@ -1,10 +1,13 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
 import json
 import random
 import re

 from .common import InfoExtractor
 from ..utils import (
-    ExtractorError,
+    compat_str,
 )


@@ -12,86 +15,98 @@ class EightTracksIE(InfoExtractor):
    IE_NAME = '8tracks'
    _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
    _TEST = {
-        u"name": u"EightTracks",
-        u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
-        u"playlist": [
+        "name": "EightTracks",
+        "url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
+        "info_dict": {
+            'id': '1336550',
+            'display_id': 'youtube-dl-test-tracks-a',
+            "description": "test chars:  \"'/\\ä↭",
+            "title": "youtube-dl test tracks \"'/\\ä↭<>",
+        },
+        "playlist": [
            {
-                u"file": u"11885610.m4a",
-                u"md5": u"96ce57f24389fc8734ce47f4c1abcc55",
-                u"info_dict": {
-                    u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "96ce57f24389fc8734ce47f4c1abcc55",
+                "info_dict": {
+                    "id": "11885610",
+                    "ext": "m4a",
+                    "title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885608.m4a",
-                u"md5": u"4ab26f05c1f7291ea460a3920be8021f",
-                u"info_dict": {
-                    u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "4ab26f05c1f7291ea460a3920be8021f",
+                "info_dict": {
+                    "id": "11885608",
+                    "ext": "m4a",
+                    "title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885679.m4a",
-                u"md5": u"d30b5b5f74217410f4689605c35d1fd7",
-                u"info_dict": {
-                    u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "d30b5b5f74217410f4689605c35d1fd7",
+                "info_dict": {
+                    "id": "11885679",
+                    "ext": "m4a",
+                    "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885680.m4a",
-                u"md5": u"4eb0a669317cd725f6bbd336a29f923a",
-                u"info_dict": {
-                    u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "4eb0a669317cd725f6bbd336a29f923a",
+                "info_dict": {
+                    "id": "11885680",
+                    "ext": "m4a",
+                    "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885682.m4a",
-                u"md5": u"1893e872e263a2705558d1d319ad19e8",
-                u"info_dict": {
-                    u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "1893e872e263a2705558d1d319ad19e8",
+                "info_dict": {
+                    "id": "11885682",
+                    "ext": "m4a",
+                    "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885683.m4a",
-                u"md5": u"b673c46f47a216ab1741ae8836af5899",
-                u"info_dict": {
-                    u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "b673c46f47a216ab1741ae8836af5899",
+                "info_dict": {
+                    "id": "11885683",
+                    "ext": "m4a",
+                    "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885684.m4a",
-                u"md5": u"1d74534e95df54986da7f5abf7d842b7",
-                u"info_dict": {
-                    u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "1d74534e95df54986da7f5abf7d842b7",
+                "info_dict": {
+                    "id": "11885684",
+                    "ext": "m4a",
+                    "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                }
            },
            {
-                u"file": u"11885685.m4a",
-                u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0",
-                u"info_dict": {
-                    u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
-                    u"uploader_id": u"ytdl"
+                "md5": "f081f47af8f6ae782ed131d38b9cd1c0",
+                "info_dict": {
+                    "id": "11885685",
+                    "ext": "m4a",
+                    "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
+                    "uploader_id": "ytdl"
                }
            }
        ]
    }

-
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'Invalid URL: %s' % url)
        playlist_id = mobj.group('id')

        webpage = self._download_webpage(url, playlist_id)

-        json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
+        json_like = self._search_regex(
+            r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
        data = json.loads(json_like)

        session = str(random.randint(0, 1000000000))
@@ -99,21 +114,30 @@ class EightTracksIE(InfoExtractor):
        track_count = data['tracks_count']
        first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
        next_url = first_url
-        res = []
+        entries = []
        for i in range(track_count):
-            api_json = self._download_webpage(next_url, playlist_id,
-                note=u'Downloading song information %s/%s' % (str(i+1), track_count),
-                errnote=u'Failed to download song information')
+            api_json = self._download_webpage(
+                next_url, playlist_id,
+                note='Downloading song information %d/%d' % (i + 1, track_count),
+                errnote='Failed to download song information')
            api_data = json.loads(api_json)
-            track_data = api_data[u'set']['track']
+            track_data = api_data['set']['track']
            info = {
-                'id': track_data['id'],
+                'id': compat_str(track_data['id']),
                'url': track_data['track_file_stream_url'],
                'title': track_data['performer'] + u' - ' + track_data['name'],
                'raw_title': track_data['name'],
                'uploader_id': data['user']['login'],
                'ext': 'm4a',
            }
-            res.append(info)
-            next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
-        return res
+            entries.append(info)
+            next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (
+                session, mix_id, track_data['id'])
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'id': compat_str(mix_id),
+            'display_id': playlist_id,
+            'title': data.get('name'),
+            'description': data.get('description'),
+        }
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..utils import fix_xml_ampersands


 class EmpflixIE(InfoExtractor):
@@ -36,7 +37,8 @@ class EmpflixIE(InfoExtractor):
            webpage, 'flashvars.config')

        cfg_xml = self._download_xml(
-            cfg_url, video_id, note='Downloading metadata')
+            cfg_url, video_id, note='Downloading metadata',
+            transform_source=fix_xml_ampersands)

        formats = [
            {
@@ -44,11 +46,13 @@ class EmpflixIE(InfoExtractor):
                'format_id': item.find('res').text,
            } for item in cfg_xml.findall('./quality/item')
        ]
+        thumbnail = cfg_xml.find('./startThumb').text

        return {
            'id': video_id,
            'title': video_title,
            'description': video_description,
+            'thumbnail': thumbnail,
            'formats': formats,
            'age_limit': age_limit,
        }
@@ -12,10 +12,11 @@ from ..utils import (
 class EveryonesMixtapeIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
-        'file': '5bfseWNmlds.mp4',
        "info_dict": {
+            'id': '5bfseWNmlds',
+            'ext': 'mp4',
            "title": "Passion Pit - \"Sleepyhead\" (Official Music Video)",
            "uploader": "FKR.TV",
            "uploader_id": "frenchkissrecords",
@@ -25,7 +26,14 @@ class EveryonesMixtapeIE(InfoExtractor):
        'params': {
            'skip_download': True,  # This is simply YouTube
        }
-    }
+    }, {
+        'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
+        'info_dict': {
+            'id': 'm7m0jJAbMQi',
+            'title': 'Driving',
+        },
+        'playlist_count': 24
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -21,14 +21,14 @@ class FacebookIE(InfoExtractor):
    _VALID_URL = r'''(?x)
        https?://(?:\w+\.)?facebook\.com/
        (?:[^#]*?\#!/)?
-        (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
+        (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?)
        (?:v|video_id)=(?P<id>[0-9]+)
        (?:.*)'''
    _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
    _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
    _NETRC_MACHINE = 'facebook'
    IE_NAME = 'facebook'
-    _TEST = {
+    _TESTS = [{
        'url': 'https://www.facebook.com/photo.php?v=120708114770723',
        'md5': '48975a41ccc4b7a581abd68651c1a5a8',
        'info_dict': {
@@ -37,7 +37,10 @@ class FacebookIE(InfoExtractor):
            'duration': 279,
            'title': 'PEOPLE ARE AWESOME 2013',
        }
-    }
+    }, {
+        'url': 'https://www.facebook.com/video.php?v=10204634152394104',
+        'only_matching': True,
+    }]

    def _login(self):
        (useremail, password) = self._get_login_info()
@@ -12,6 +12,7 @@ from ..utils import (
    compat_urlparse,
    compat_xml_parse_error,

+    determine_ext,
    ExtractorError,
    float_or_none,
    HEADRequest,
@@ -341,6 +342,30 @@ class GenericIE(InfoExtractor):
                'uploader': 'www.handjobhub.com',
                'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
            }
+        },
+        # RSS feed
+        {
+            'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+            'info_dict': {
+                'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+                'title': 'Zero Punctuation',
+                'description': 're:'
+            },
+            'playlist_mincount': 11,
+        },
+        # Multiple brightcove videos
+        # https://github.com/rg3/youtube-dl/issues/2283
+        {
+            'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
+            'info_dict': {
+                'id': 'always-never',
+                'title': 'Always / Never - The New Yorker',
+            },
+            'playlist_count': 3,
+            'params': {
+                'extract_flat': False,
+                'skip_download': True,
+            }
        }
    ]

@@ -809,7 +834,6 @@ class GenericIE(InfoExtractor):
                    \s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
                        ["']?url["']?\s*:\s*["']([^"']+)["']
            ''', webpage)
-            assert found
        if not found:
            # Try to find twitter cards info
            found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
@@ -821,13 +845,14 @@ class GenericIE(InfoExtractor):
            if m_video_type is not None:
                def check_video(vurl):
                    vpath = compat_urlparse.urlparse(vurl).path
-                    return '.' in vpath and not vpath.endswith('.swf')
+                    vext = determine_ext(vpath)
+                    return '.' in vpath and vext not in ('swf', 'png', 'jpg')
                found = list(filter(
                    check_video,
                    re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)))
        if not found:
            # HTML5 video
-            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage)
+            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]+)? src="([^"]+)"', webpage)
        if not found:
            found = re.search(
                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
@@ -14,6 +14,14 @@ class GoogleSearchIE(SearchInfoExtractor):
    _MAX_RESULTS = 1000
    IE_NAME = 'video.google:search'
    _SEARCH_KEY = 'gvsearch'
+    _TEST = {
+        'url': 'gvsearch15:python language',
+        'info_dict': {
+            'id': 'python language',
+            'title': 'python language',
+        },
+        'playlist_count': 15,
+    }

    def _get_n_results(self, query, n):
        """Get a specified number of results for a query"""
@@ -18,6 +18,7 @@ class IGNIE(InfoExtractor):
    _DESCRIPTION_RE = [
        r'<span class="page-object-description">(.+?)</span>',
        r'id="my_show_video">.*?<p>(.*?)</p>',
+        r'<meta name="description" content="(.*?)"',
    ]

    _TESTS = [
@@ -55,6 +56,17 @@ class IGNIE(InfoExtractor):
                'skip_download': True,
            },
        },
+        {
+            'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
+            'md5': '4e9a0bda1e5eebd31ddcf86ec0b9b3c7',
+            'info_dict': {
+                'id': '078fdd005f6d3c02f63d795faa1b984f',
+                'ext': 'mp4',
+                'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
+                'description': 'Giant skeletons, bloody hunts, and captivating'
+                    ' natural beauty take our breath away.',
+            },
+        },
    ]

    def _find_video_id(self, webpage):
@@ -62,6 +74,7 @@ class IGNIE(InfoExtractor):
            r'data-video-id="(.+?)"',
            r'<object id="vid_(.+?)"',
            r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
+            r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
        ]
        return self._search_regex(res_id, webpage, 'video id')

@@ -70,10 +83,7 @@ class IGNIE(InfoExtractor):
        name_or_id = mobj.group('name_or_id')
        page_type = mobj.group('type')
        webpage = self._download_webpage(url, name_or_id)
-        if page_type == 'articles':
-            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, 'video url')
-            return self.url_result(video_url, ie='IGN')
-        elif page_type != 'video':
+        if page_type != 'video':
            multiple_urls = re.findall(
                '<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
                webpage)
@@ -63,6 +63,14 @@ class ImdbListIE(InfoExtractor):
    IE_NAME = 'imdb:list'
    IE_DESC = 'Internet Movie Database lists'
    _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+    _TEST = {
+        'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
+        'info_dict': {
+            'id': 'JFs9NWw6XI0',
+            'title': 'March 23, 2012 Releases',
+        },
+        'playlist_count': 7,
+    }
    
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -46,6 +46,30 @@ class InstagramUserIE(InfoExtractor):
    _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
    IE_DESC = 'Instagram user profile'
    IE_NAME = 'instagram:user'
+    _TEST = {
+        'url': 'http://instagram.com/porsche',
+        'info_dict': {
+            'id': 'porsche',
+            'title': 'porsche',
+        },
+        'playlist_mincount': 2,
+        'playlist': [{
+            'info_dict': {
+                'id': '614605558512799803_462752227',
+                'ext': 'mp4',
+                'title': '#Porsche Intelligent Performance.',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'uploader': 'Porsche',
+                'uploader_id': 'porsche',
+                'timestamp': 1387486713,
+                'upload_date': '20131219',
+            },
+        }],
+        'params': {
+            'extract_flat': True,
+            'skip_download': True,
+        }
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -127,6 +127,21 @@ class IviCompilationIE(InfoExtractor):
    IE_DESC = 'ivi.ru compilations'
    IE_NAME = 'ivi:compilation'
    _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+    _TESTS = [{
+        'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
+        'info_dict': {
+            'id': 'dvoe_iz_lartsa',
+            'title': 'Двое из ларца (2006 - 2008)',
+        },
+        'playlist_mincount': 24,
+    }, {
+        'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
+        'info_dict': {
+            'id': 'dvoe_iz_lartsa/season1',
+            'title': 'Двое из ларца (2006 - 2008) 1 сезон',
+        },
+        'playlist_mincount': 12,
+    }]

    def _extract_entries(self, html, compilation_id):
        return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
@@ -12,18 +12,27 @@ class KhanAcademyIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
    IE_NAME = 'KhanAcademy'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.khanacademy.org/video/one-time-pad',
-        'file': 'one-time-pad.mp4',
        'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
        'info_dict': {
+            'id': 'one-time-pad',
+            'ext': 'mp4',
            'title': 'The one-time pad',
            'description': 'The perfect cipher',
            'duration': 176,
            'uploader': 'Brit Cruise',
            'upload_date': '20120411',
        }
-    }
+    }, {
+        'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
+        'info_dict': {
+            'id': 'cryptography',
+            'title': 'Journey into cryptography',
+            'description': 'How have humans protected their secret messages through history? What has changed today?',
+        },
+        'playlist_mincount': 3,
+    }]

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
@@ -19,7 +19,7 @@ from ..utils import (
 class LivestreamIE(InfoExtractor):
    IE_NAME = 'livestream'
    _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
        'md5': '53274c76ba7754fb0e8d072716f2292b',
        'info_dict': {
@@ -31,7 +31,13 @@ class LivestreamIE(InfoExtractor):
            'view_count': int,
            'thumbnail': 're:^http://.*\.jpg$'
        }
-    }
+    }, {
+        'url': 'http://new.livestream.com/tedx/cityenglish',
+        'info_dict': {
+            'title': 'TEDCity2.0 (English)',
+        },
+        'playlist_mincount': 4,
+    }]

    def _parse_smil(self, video_id, smil_url):
        formats = []
@@ -111,34 +117,37 @@ class LivestreamIE(InfoExtractor):
        event_name = mobj.group('event_name')
        webpage = self._download_webpage(url, video_id or event_name)

-        og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None)
-        if og_video is None:
-            config_json = self._search_regex(
-                r'window.config = ({.*?});', webpage, 'window config')
-            info = json.loads(config_json)['event']
-
-            def is_relevant(vdata, vid):
-                result = vdata['type'] == 'video'
-                if video_id is not None:
-                    result = result and compat_str(vdata['data']['id']) == vid
-                return result
-
-            videos = [self._extract_video_info(video_data['data'])
-                      for video_data in info['feed']['data']
-                      if is_relevant(video_data, video_id)]
-            if video_id is None:
-                # This is an event page:
-                return self.playlist_result(videos, info['id'], info['full_name'])
-            else:
-                if videos:
-                    return videos[0]
-        else:
+        og_video = self._og_search_video_url(
+            webpage, 'player url', fatal=False, default=None)
+        if og_video is not None:
            query_str = compat_urllib_parse_urlparse(og_video).query
            query = compat_urlparse.parse_qs(query_str)
-            api_url = query['play_url'][0].replace('.smil', '')
-            info = json.loads(self._download_webpage(
-                api_url, video_id, 'Downloading video info'))
-            return self._extract_video_info(info)
+            if 'play_url' in query:
+                api_url = query['play_url'][0].replace('.smil', '')
+                info = json.loads(self._download_webpage(
+                    api_url, video_id, 'Downloading video info'))
+                return self._extract_video_info(info)
+
+        config_json = self._search_regex(
+            r'window.config = ({.*?});', webpage, 'window config')
+        info = json.loads(config_json)['event']
+
+        def is_relevant(vdata, vid):
+            result = vdata['type'] == 'video'
+            if video_id is not None:
+                result = result and compat_str(vdata['data']['id']) == vid
+            return result
+
+        videos = [self._extract_video_info(video_data['data'])
+                  for video_data in info['feed']['data']
+                  if is_relevant(video_data, video_id)]
+        if video_id is None:
+            # This is an event page:
+            return self.playlist_result(videos, info['id'], info['full_name'])
+        else:
+            if not videos:
+                raise ExtractorError('Cannot find video %s' % video_id)
+            return videos[0]


 # The original version of Livestream uses a different system
@@ -148,7 +157,7 @@ class LivestreamOriginalIE(InfoExtractor):
        (?P<user>[^/]+)/(?P<type>video|folder)
        (?:\?.*?Id=|/)(?P<id>.*?)(&|$)
        '''
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
        'info_dict': {
            'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
@@ -159,7 +168,13 @@ class LivestreamOriginalIE(InfoExtractor):
            # rtmp
            'skip_download': True,
        },
-    }
+    }, {
+        'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+        'info_dict': {
+            'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+        },
+        'playlist_mincount': 4,
+    }]

    def _extract_video(self, user, video_id):
        api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
@@ -182,15 +197,19 @@ class LivestreamOriginalIE(InfoExtractor):

    def _extract_folder(self, url, folder_id):
        webpage = self._download_webpage(url, folder_id)
-        urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
+        paths = orderedSet(re.findall(
+            r'''(?x)(?:
+                <li\s+class="folder">\s*<a\s+href="|
+                <a\s+href="(?=https?://livestre\.am/)
+            )([^"]+)"''', webpage))

        return {
            '_type': 'playlist',
            'id': folder_id,
            'entries': [{
                '_type': 'url',
-                'url': video_url,
-            } for video_url in urls],
+                'url': compat_urlparse.urljoin(url, p),
+            } for p in paths],
        }

    def _real_extract(self, url):
@@ -6,6 +6,7 @@ from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    ExtractorError,
+    HEADRequest,
    int_or_none,
    parse_iso8601,
 )
@@ -38,7 +39,7 @@ class MixcloudIE(InfoExtractor):
            try:
                # We only want to know if the request succeed
                # don't download the whole file
-                self._request_webpage(url, None, False)
+                self._request_webpage(HEADRequest(url), None, False)
                return url
            except ExtractorError:
                url = None
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import os
 import re

@@ -8,15 +10,17 @@ from ..utils import (
    compat_urllib_parse,
 )

+
 class MofosexIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
+    _VALID_URL = r'^https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
    _TEST = {
-        u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
-        u'file': u'5018.mp4',
-        u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
-        u'info_dict': {
-            u"title": u"Japanese Teen Music Video",
-            u"age_limit": 18,
+        'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
+        'md5': '1b2eb47ac33cc75d4a80e3026b613c5a',
+        'info_dict': {
+            'id': '5018',
+            'ext': 'mp4',
+            'title': 'Japanese Teen Music Video',
+            'age_limit': 18,
        }
    }

@@ -29,8 +33,8 @@ class MofosexIE(InfoExtractor):
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)

-        video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
-        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
+        video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
+        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
        path = compat_urllib_parse_urlparse(video_url).path
        extension = os.path.splitext(path)[1][1:]
        format = path.split('/')[5].split('_')[:2]
@@ -0,0 +1,76 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    unified_strdate,
+)
+
+
+class MusicVaultIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
+    _TEST = {
+        'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
+        'md5': '2cdbb3ae75f7fb3519821507d2fb3c15',
+        'info_dict': {
+            'id': '1010863',
+            'ext': 'mp4',
+            'uploader_id': 'the-allman-brothers-band',
+            'title': 'Straight from the Heart',
+            'duration': 244,
+            'uploader': 'The Allman Brothers Band',
+            'thumbnail': 're:^https?://.*/thumbnail/.*',
+            'upload_date': '19811216',
+            'location': 'Capitol Theatre (Passaic, NJ)',
+            'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('display_id')
+        webpage = self._download_webpage(url, display_id)
+
+        thumbnail = self._search_regex(
+            r'<meta itemprop="thumbnail" content="([^"]+)"',
+            webpage, 'thumbnail', fatal=False)
+
+        data_div = self._search_regex(
+            r'(?s)<div class="data">(.*?)</div>', webpage, 'data fields')
+        uploader = self._html_search_regex(
+            r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
+        title = self._html_search_regex(
+            r'<h2.*?>(.*?)</h2>', data_div, 'title')
+        upload_date = unified_strdate(self._html_search_regex(
+            r'<h3.*?>(.*?)</h3>', data_div, 'uploader', fatal=False))
+        location = self._html_search_regex(
+            r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
+
+        duration = parse_duration(self._html_search_meta('duration', webpage))
+
+        VIDEO_URL_TEMPLATE = 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http'
+        kaltura_id = self._search_regex(
+            r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
+            webpage, 'kaltura ID')
+        video_url = VIDEO_URL_TEMPLATE % {
+            'entry_id': kaltura_id,
+            'wid': self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid'),
+            'uid': self._search_regex(r'uiconf_id/([0-9]+)/', webpage, 'uid'),
+        }
+
+        return {
+            'id': mobj.group('id'),
+            'url': video_url,
+            'ext': 'mp4',
+            'display_id': display_id,
+            'uploader_id': mobj.group('uploader_id'),
+            'thumbnail': thumbnail,
+            'description': self._html_search_meta('description', webpage),
+            'upload_date': upload_date,
+            'location': location,
+            'title': title,
+            'uploader': uploader,
+            'duration': duration,
+        }
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import json

@@ -25,8 +27,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
            'path': initial_video_url.replace('.mp4', '_sd.mp4'),
        })
        path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
-        path_doc = self._download_xml(path_url, video_id,
-            u'Downloading final video url')
+        path_doc = self._download_xml(
+            path_url, video_id, 'Downloading final video url')
        video_url = path_doc.find('path').text

        join = compat_urlparse.urljoin
@@ -43,17 +45,18 @@ class NHLBaseInfoExtractor(InfoExtractor):


 class NHLIE(NHLBaseInfoExtractor):
-    IE_NAME = u'nhl.com'
-    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)'
+    IE_NAME = 'nhl.com'
+    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)'

    _TEST = {
-        u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
-        u'file': u'453614.mp4',
-        u'info_dict': {
-            u'title': u'Quick clip: Weise 4-3 goal vs Flames',
-            u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.',
-            u'duration': 18,
-            u'upload_date': u'20131006',
+        'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
+        'info_dict': {
+            'id': '453614',
+            'ext': 'mp4',
+            'title': 'Quick clip: Weise 4-3 goal vs Flames',
+            'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
+            'duration': 18,
+            'upload_date': '20131006',
        },
    }

@@ -61,23 +64,23 @@ class NHLIE(NHLBaseInfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
-        info_json = self._download_webpage(json_url, video_id,
-            u'Downloading info json')
-        info_json = self._fix_json(info_json)
-        info = json.loads(info_json)[0]
-        return self._extract_video(info)
+        data = self._download_json(
+            json_url, video_id, transform_source=self._fix_json)
+        return self._extract_video(data[0])


 class NHLVideocenterIE(NHLBaseInfoExtractor):
-    IE_NAME = u'nhl.com:videocenter'
-    IE_DESC = u'NHL videocenter category'
-    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
-
-    @classmethod
-    def suitable(cls, url):
-        if NHLIE.suitable(url):
-            return False
-        return super(NHLVideocenterIE, cls).suitable(url)
+    IE_NAME = 'nhl.com:videocenter'
+    IE_DESC = 'NHL videocenter category'
+    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
+    _TEST = {
+        'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
+        'info_dict': {
+            'id': '999',
+            'title': 'Highlights',
+        },
+        'playlist_count': 12,
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -86,10 +89,10 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
        cat_id = self._search_regex(
            [r'var defaultCatId = "(.+?)";',
             r'{statusIndex:0,index:0,.*?id:(.*?),'],
-            webpage, u'category id')
+            webpage, 'category id')
        playlist_title = self._html_search_regex(
            r'tab0"[^>]*?>(.*?)</td>',
-            webpage, u'playlist title', flags=re.DOTALL).lower().capitalize()
+            webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()

        data = compat_urllib_parse.urlencode({
            'cid': cat_id,
@@ -104,7 +107,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
        response = self._fix_json(response)
        if not response.strip():
            self._downloader.report_warning(u'Got an empty reponse, trying '
-                                            u'adding the "newvideos" parameter')
+                                            'adding the "newvideos" parameter')
            response = self._download_webpage(request_url + '&newvideos=true',
                playlist_title)
            response = self._fix_json(response)
@@ -114,5 +117,5 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
            '_type': 'playlist',
            'title': playlist_title,
            'id': cat_id,
-            'entries': [self._extract_video(i) for i in videos],
+            'entries': [self._extract_video(v) for v in videos],
        }
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    determine_ext,
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+
+
+class PromptFileIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
+    _FILE_NOT_FOUND_REGEX = r'<div.+id="not_found_msg".+>.+</div>[^-]'
+    _TEST = {
+        'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF',
+        'md5': 'd1451b6302da7215485837aaea882c4c',
+        'info_dict': {
+            'id': 'D21B4746E9-F01462F0FF',
+            'ext': 'mp4',
+            'title': 'Birds.mp4',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+
+        if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
+            raise ExtractorError('Video %s does not exist' % video_id,
+                                 expected=True)
+
+        fields = dict(re.findall(r'''(?x)type="hidden"\s+
+            name="(.+?)"\s+
+            value="(.*?)"
+            ''', webpage))
+        post = compat_urllib_parse.urlencode(fields)
+        req = compat_urllib_request.Request(url, post)
+        req.add_header('Content-type', 'application/x-www-form-urlencoded')
+        webpage = self._download_webpage(
+            req, video_id, 'Downloading video page')
+
+        url = self._html_search_regex(r'url:\s*\'([^\']+)\'', webpage, 'URL')
+        title = self._html_search_regex(
+            r'<span.+title="([^"]+)">', webpage, 'title')
+        thumbnail = self._html_search_regex(
+            r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"',
+            webpage, 'thumbnail', fatal=False, flags=re.DOTALL)
+
+        formats = [{
+            'format_id': 'sd',
+            'url': url,
+            'ext': determine_ext(title),
+        }]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..utils import parse_duration


 class RtlXlIE(InfoExtractor):
@@ -11,20 +12,15 @@ class RtlXlIE(InfoExtractor):

    _TEST = {
        'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
+        'md5': 'cc16baa36a6c169391f0764fa6b16654',
        'info_dict': {
            'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'RTL Nieuws - Laat',
-            'description': 'Dagelijks het laatste nieuws uit binnen- en '
-                'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van '
-                'onze mobiele apps.',
+            'description': 'md5:6b61f66510c8889923b11f2778c72dc5',
            'timestamp': 1408051800,
            'upload_date': '20140814',
-        },
-        'params': {
-            # We download the first bytes of the first fragment, it can't be
-            # processed by the f4m downloader beacuse it isn't complete
-            'skip_download': True,
+            'duration': 576.880,
        },
    }

@@ -35,17 +31,37 @@ class RtlXlIE(InfoExtractor):
        info = self._download_json(
            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
            uuid)
+
        material = info['material'][0]
        episode_info = info['episodes'][0]

-        f4m_url = 'http://manifest.us.rtl.nl' + material['videopath']
        progname = info['abstracts'][0]['name']
        subtitle = material['title'] or info['episodes'][0]['name']

+        videopath = material['videopath']
+        f4m_url = 'http://manifest.us.rtl.nl' + videopath
+
+        formats = self._extract_f4m_formats(f4m_url, uuid)
+
+        video_urlpart = videopath.split('/flash/')[1][:-4]
+        PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4'
+
+        formats.extend([
+            {
+                'url': PG_URL_TEMPLATE % ('a2m', video_urlpart),
+                'format_id': 'pg-sd',
+            },
+            {
+                'url': PG_URL_TEMPLATE % ('a3m', video_urlpart),
+                'format_id': 'pg-hd',
+            }
+        ])
+
        return {
            'id': uuid,
-            'title': '%s - %s' % (progname, subtitle), 
-            'formats': self._extract_f4m_formats(f4m_url, uuid),
+            'title': '%s - %s' % (progname, subtitle),
+            'formats': formats,
            'timestamp': material['original_date'],
            'description': episode_info['synopsis'],
+            'duration': parse_duration(material.get('duration')),
        }
@@ -74,6 +74,13 @@ class RutubeChannelIE(InfoExtractor):
    IE_NAME = 'rutube:channel'
    IE_DESC = 'Rutube channels'
    _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://rutube.ru/tags/video/1800/',
+        'info_dict': {
+            'id': '1800',
+        },
+        'playlist_mincount': 68,
+    }]

    _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'

@@ -101,6 +108,7 @@ class RutubeMovieIE(RutubeChannelIE):
    IE_NAME = 'rutube:movie'
    IE_DESC = 'Rutube movies'
    _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
+    _TESTS = []

    _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
    _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
@@ -119,5 +127,12 @@ class RutubePersonIE(RutubeChannelIE):
    IE_NAME = 'rutube:person'
    IE_DESC = 'Rutube person videos'
    _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://rutube.ru/video/person/313878/',
+        'info_dict': {
+            'id': '313878',
+        },
+        'playlist_mincount': 37,
+    }]

    _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
@@ -267,6 +267,14 @@ class SmotriCommunityIE(InfoExtractor):
    IE_DESC = 'Smotri.com community videos'
    IE_NAME = 'smotri:community'
    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
+    _TEST = {
+        'url': 'http://smotri.com/community/video/kommuna',
+        'info_dict': {
+            'id': 'kommuna',
+            'title': 'КПРФ',
+        },
+        'playlist_mincount': 4,
+    }
    
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -289,6 +297,14 @@ class SmotriUserIE(InfoExtractor):
    IE_DESC = 'Smotri.com user videos'
    IE_NAME = 'smotri:user'
    _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
+    _TESTS = [{
+        'url': 'http://smotri.com/user/inspector',
+        'info_dict': {
+            'id': 'inspector',
+            'title': 'Inspector',
+        },
+        'playlist_mincount': 9,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -61,7 +61,10 @@ class SockshareIE(InfoExtractor):
            r'<a href="([^"]*)".+class="download_file_link"',
            webpage, 'file url')
        video_url = "http://www.sockshare.com" + video_url
-        title = self._html_search_regex(r'<h1>(.+)<strong>', webpage, 'title')
+        title = self._html_search_regex((
+            r'<h1>(.+)<strong>',
+            r'var name = "([^"]+)";'),
+            webpage, 'title', default=None)
        thumbnail = self._html_search_regex(
            r'<img\s+src="([^"]*)".+?name="bg"',
            webpage, 'thumbnail')
@@ -28,7 +28,8 @@ class SoundcloudIE(InfoExtractor):
    _VALID_URL = r'''(?x)^(?:https?://)?
                    (?:(?:(?:www\.|m\.)?soundcloud\.com/
                            (?P<uploader>[\w\d-]+)/
-                            (?!sets/)(?P<title>[\w\d-]+)/?
+                            (?!sets/|likes/?(?:$|[?#]))
+                            (?P<title>[\w\d-]+)/?
                            (?P<token>[^?]+?)?(?:[?].*)?$)
                       |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
                       |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
@@ -221,13 +222,16 @@ class SoundcloudIE(InfoExtractor):
 class SoundcloudSetIE(SoundcloudIE):
    _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
    IE_NAME = 'soundcloud:set'
-    # it's in tests/test_playlists.py
-    _TESTS = []
+    _TESTS = [{
+        'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
+        'info_dict': {
+            'title': 'The Royal Concept EP',
+        },
+        'playlist_mincount': 6,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)

        # extract uploader (which is in the url)
        uploader = mobj.group(1)
@@ -246,20 +250,32 @@ class SoundcloudSetIE(SoundcloudIE):
                self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
            return

-        self.report_extraction(full_title)
-        return {'_type': 'playlist',
-                'entries': [self._extract_info_dict(track) for track in info['tracks']],
-                'id': info['id'],
-                'title': info['title'],
-                }
+        return {
+            '_type': 'playlist',
+            'entries': [self._extract_info_dict(track) for track in info['tracks']],
+            'id': info['id'],
+            'title': info['title'],
+        }


 class SoundcloudUserIE(SoundcloudIE):
    _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
    IE_NAME = 'soundcloud:user'
-
-    # it's in tests/test_playlists.py
-    _TESTS = []
+    _TESTS = [{
+        'url': 'https://soundcloud.com/the-concept-band',
+        'info_dict': {
+            'id': '9615865',
+            'title': 'The Royal Concept',
+        },
+        'playlist_mincount': 12
+    }, {
+        'url': 'https://soundcloud.com/the-concept-band/likes',
+        'info_dict': {
+            'id': '9615865',
+            'title': 'The Royal Concept',
+        },
+        'playlist_mincount': 1,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -301,9 +317,18 @@ class SoundcloudUserIE(SoundcloudIE):
 class SoundcloudPlaylistIE(SoundcloudIE):
    _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'
    IE_NAME = 'soundcloud:playlist'
+    _TESTS = [

-     # it's in tests/test_playlists.py
-    _TESTS = []
+        {
+            'url': 'http://api.soundcloud.com/playlists/4110309',
+            'info_dict': {
+                'id': '4110309',
+                'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
+                'description': 're:.*?TILT Brass - Bowery Poetry Club',
+            },
+            'playlist_count': 6,
+        }
+    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_request,
+    parse_iso8601,
+)
+
+
+class SportDeutschlandIE(InfoExtractor):
+    _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
+    _TESTS = [{
+        'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
+        'info_dict': {
+            'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
+            'ext': 'mp4',
+            'title': 'LIVE: Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
+            'categories': ['Badminton'],
+            'view_count': int,
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 're:^Die Badminton-WM 2014 aus Kopenhagen LIVE',
+            'timestamp': int,
+            'upload_date': 're:^201408[23][0-9]$',
+        },
+        'params': {
+            'skip_download': 'Live stream',
+        },
+    }, {
+        'url': 'http://sportdeutschland.tv/li-ning-badminton-wm-2014/lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
+        'info_dict': {
+            'id': 'lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
+            'ext': 'mp4',
+            'upload_date': '20140825',
+            'description': 'md5:60a20536b57cee7d9a4ec005e8687504',
+            'timestamp': 1408976060,
+            'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'view_count': int,
+            'categories': ['Li-Ning Badminton WM 2014'],
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        sport_id = mobj.group('sport')
+
+        api_url = 'http://splink.tv/api/permalinks/%s/%s' % (
+            sport_id, video_id)
+        req = compat_urllib_request.Request(api_url, headers={
+            'Accept': 'application/vnd.vidibus.v2.html+json',
+            'Referer': url,
+        })
+        data = self._download_json(req, video_id)
+
+        categories = list(data.get('section', {}).get('tags', {}).values())
+        asset = data['asset']
+
+        formats = []
+        smil_url = asset['video']
+        if '.smil' in smil_url:
+            m3u8_url = smil_url.replace('.smil', '.m3u8')
+            formats.extend(
+                self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
+
+            smil_doc = self._download_xml(
+                smil_url, video_id, note='Downloading SMIL metadata')
+            base_url = smil_doc.find('./head/meta').attrib['base']
+            formats.extend([{
+                'format_id': 'rmtp',
+                'url': base_url,
+                'play_path': n.attrib['src'],
+                'ext': 'flv',
+                'preference': -100,
+                'format_note': 'Seems to fail at example stream',
+            } for n in smil_doc.findall('./body/video')])
+        else:
+            formats.append({'url': smil_url})
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': asset['title'],
+            'thumbnail': asset.get('image'),
+            'description': asset.get('teaser'),
+            'categories': categories,
+            'view_count': asset.get('views'),
+            'rtmp_live': asset.get('live'),
+            'timestamp': parse_iso8601(asset.get('date')),
+        }
+
@@ -106,6 +106,13 @@ class TeacherTubeUserIE(InfoExtractor):
        \s*
        <a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
    '''
+    _TEST = {
+        'url': 'http://www.teachertube.com/user/profile/rbhagwati2',
+        'info_dict': {
+            'id': 'rbhagwati2'
+        },
+        'playlist_mincount': 179,
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -51,7 +51,6 @@ class TEDIE(SubtitlesInfoExtractor):
        }
    }, {
        'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
-        'md5': '49144e345a899b8cb34d315f3b9cfeeb',
        'info_dict': {
            'id': '1972',
            'ext': 'mp4',
@@ -59,6 +58,13 @@ class TEDIE(SubtitlesInfoExtractor):
            'uploader': 'Gabby Giffords and Mark Kelly',
            'description': 'md5:5174aed4d0f16021b704120360f72b92',
        },
+    }, {
+        'url': 'http://www.ted.com/playlists/who_are_the_hackers',
+        'info_dict': {
+            'id': '10',
+            'title': 'Who are the hackers?',
+        },
+        'playlist_mincount': 6,
    }]

    _NATIVE_FORMATS = {
@@ -42,6 +42,13 @@ class ToypicsIE(InfoExtractor):
 class ToypicsUserIE(InfoExtractor):
    IE_DESC = 'Toypics user profile'
    _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
+    _TEST = {
+        'url': 'http://videos.toypics.net/Mikey',
+        'info_dict': {
+            'id': 'Mikey',
+        },
+        'playlist_mincount': 9917,
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals

-import json
 import re

 from .common import InfoExtractor
@@ -68,21 +67,36 @@ class UstreamIE(InfoExtractor):
 class UstreamChannelIE(InfoExtractor):
    _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
    IE_NAME = 'ustream:channel'
+    _TEST = {
+        'url': 'http://www.ustream.tv/channel/channeljapan',
+        'info_dict': {
+            'id': '10874166',
+        },
+        'playlist_mincount': 54,
+    }

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
-        slug = m.group('slug')
-        webpage = self._download_webpage(url, slug)
+        display_id = m.group('slug')
+        webpage = self._download_webpage(url, display_id)
        channel_id = get_meta_content('ustream:channel_id', webpage)

        BASE = 'http://www.ustream.tv'
        next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
        video_ids = []
        while next_url:
-            reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
+            reply = self._download_json(
+                compat_urlparse.urljoin(BASE, next_url), display_id,
+                note='Downloading video information (next: %d)' % (len(video_ids) + 1))
            video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
            next_url = reply['nextUrl']

-        urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
-        url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
-        return self.playlist_result(url_entries, channel_id)
+        entries = [
+            self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
+            for vid in video_ids]
+        return {
+            '_type': 'playlist',
+            'id': channel_id,
+            'display_id': display_id,
+            'entries': entries,
+        }
@@ -16,8 +16,9 @@ class VeeHDIE(InfoExtractor):

    _TEST = {
        'url': 'http://veehd.com/video/4686958',
-        'file': '4686958.mp4',
        'info_dict': {
+            'id': '4686958',
+            'ext': 'mp4',
            'title': 'Time Lapse View from Space ( ISS)',
            'uploader_id': 'spotted',
            'description': 'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
@@ -57,6 +57,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
        (?P<proto>(?:https?:)?//)?
        (?:(?:www|(?P<player>player))\.)?
        vimeo(?P<pro>pro)?\.com/
+        (?!channels/[^/?#]+/?(?:$|[?#])|album/)
        (?:.*?/)?
        (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
        (?:videos?/)?
@@ -151,30 +152,8 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                'duration': 62,
            }
        },
-        {
-            'note': 'video player needs Referer',
-            'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
-            'md5': '6295fdab8f4bf6a002d058b2c6dce276',
-            'info_dict': {
-                'id': '91613211',
-                'ext': 'mp4',
-                'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
-                'uploader': 'DevWeek Events',
-                'duration': 2773,
-                'thumbnail': 're:^https?://.*\.jpg$',
-            }
-        }
    ]

-    @classmethod
-    def suitable(cls, url):
-        if VimeoChannelIE.suitable(url):
-            # Otherwise channel urls like http://vimeo.com/channels/31259 would
-            # match
-            return False
-        else:
-            return super(VimeoIE, cls).suitable(url)
-
    def _verify_video_password(self, url, video_id, webpage):
        password = self._downloader.params.get('videopassword', None)
        if password is None:
@@ -393,9 +372,16 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):

 class VimeoChannelIE(InfoExtractor):
    IE_NAME = 'vimeo:channel'
-    _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)/?(\?.*)?$'
+    _VALID_URL = r'https?://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
    _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
    _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
+    _TESTS = [{
+        'url': 'http://vimeo.com/channels/tributes',
+        'info_dict': {
+            'title': 'Vimeo Tributes',
+        },
+        'playlist_mincount': 25,
+    }]

    def _page_url(self, base_url, pagenum):
        return '%s/videos/page:%d/' % (base_url, pagenum)
@@ -429,14 +415,15 @@ class VimeoChannelIE(InfoExtractor):

 class VimeoUserIE(VimeoChannelIE):
    IE_NAME = 'vimeo:user'
-    _VALID_URL = r'(?:https?://)?vimeo\.com/(?P<name>[^/]+)(?:/videos|[#?]|$)'
+    _VALID_URL = r'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
    _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
-
-    @classmethod
-    def suitable(cls, url):
-        if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url) or VimeoAlbumIE.suitable(url) or VimeoGroupsIE.suitable(url):
-            return False
-        return super(VimeoUserIE, cls).suitable(url)
+    _TESTS = [{
+        'url': 'http://vimeo.com/nkistudio/videos',
+        'info_dict': {
+            'title': 'Nki',
+        },
+        'playlist_mincount': 66,
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -446,8 +433,15 @@ class VimeoUserIE(VimeoChannelIE):

 class VimeoAlbumIE(VimeoChannelIE):
    IE_NAME = 'vimeo:album'
-    _VALID_URL = r'(?:https?://)?vimeo\.com/album/(?P<id>\d+)'
+    _VALID_URL = r'https?://vimeo\.com/album/(?P<id>\d+)'
    _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
+    _TESTS = [{
+        'url': 'http://vimeo.com/album/2632481',
+        'info_dict': {
+            'title': 'Staff Favorites: November 2013',
+        },
+        'playlist_mincount': 13,
+    }]

    def _page_url(self, base_url, pagenum):
        return '%s/page:%d/' % (base_url, pagenum)
@@ -461,6 +455,13 @@ class VimeoAlbumIE(VimeoChannelIE):
 class VimeoGroupsIE(VimeoAlbumIE):
    IE_NAME = 'vimeo:group'
    _VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)'
+    _TESTS = [{
+        'url': 'http://vimeo.com/groups/rolexawards',
+        'info_dict': {
+            'title': 'Rolex Awards for Enterprise',
+        },
+        'playlist_mincount': 73,
+    }]

    def _extract_list_title(self, webpage):
        return self._og_search_title(webpage)
@@ -474,8 +475,8 @@ class VimeoGroupsIE(VimeoAlbumIE):
 class VimeoReviewIE(InfoExtractor):
    IE_NAME = 'vimeo:review'
    IE_DESC = 'Review pages on vimeo'
-    _VALID_URL = r'(?:https?://)?vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
-    _TEST = {
+    _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
+    _TESTS = [{
        'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
        'file': '75524534.mp4',
        'md5': 'c507a72f780cacc12b2248bb4006d253',
@@ -483,7 +484,19 @@ class VimeoReviewIE(InfoExtractor):
            'title': "DICK HARDWICK 'Comedian'",
            'uploader': 'Richard Hardwick',
        }
-    }
+    }, {
+        'note': 'video player needs Referer',
+        'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
+        'md5': '6295fdab8f4bf6a002d058b2c6dce276',
+        'info_dict': {
+            'id': '91613211',
+            'ext': 'mp4',
+            'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
+            'uploader': 'DevWeek Events',
+            'duration': 2773,
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -498,6 +511,10 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
    _VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
    _LOGIN_REQUIRED = True
    _TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
+    _TESTS = [{
+        'url': 'http://vimeo.com/home/watchlater',
+        'only_matching': True,
+    }]

    def _real_initialize(self):
        self._login()
@@ -65,6 +65,13 @@ class VineUserIE(InfoExtractor):
    IE_NAME = 'vine:user'
    _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
    _VINE_BASE_URL = "https://vine.co/"
+    _TEST = {
+        'url': 'https://vine.co/Visa',
+        'info_dict': {
+            'id': 'Visa',
+        },
+        'playlist_mincount': 47,
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -2,32 +2,43 @@
 from __future__ import unicode_literals

 import re
-import time
 import hashlib

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    unified_strdate,
-)
+from ..utils import unified_strdate


 class WatIE(InfoExtractor):
    _VALID_URL = r'http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html'
    IE_NAME = 'wat.tv'
-    _TEST = {
-        'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
-        'md5': 'ce70e9223945ed26a8056d413ca55dc9',
-        'info_dict': {
-            'id': '11713067',
-            'display_id': 'soupe-figues-l-orange-aux-epices',
-            'ext': 'mp4',
-            'title': 'Soupe de figues à l\'orange et aux épices',
-            'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
-            'upload_date': '20140819',
-            'duration': 120,
+    _TESTS = [
+        {
+            'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
+            'md5': 'ce70e9223945ed26a8056d413ca55dc9',
+            'info_dict': {
+                'id': '11713067',
+                'display_id': 'soupe-figues-l-orange-aux-epices',
+                'ext': 'mp4',
+                'title': 'Soupe de figues à l\'orange et aux épices',
+                'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
+                'upload_date': '20140819',
+                'duration': 120,
+            },
        },
-    }
+        {
+            'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
+            'md5': 'fbc84e4378165278e743956d9c1bf16b',
+            'info_dict': {
+                'id': '11713075',
+                'display_id': 'gregory-lemarchal-voix-ange',
+                'ext': 'mp4',
+                'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
+                'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
+                'upload_date': '20140816',
+                'duration': 2910,
+            },
+        },
+    ]

    def download_video_info(self, real_id):
        # 'contentv4' is used in the website, but it also returns the related
@@ -46,9 +57,8 @@ class WatIE(InfoExtractor):

        video_info = self.download_video_info(real_id)

-        if video_info.get('geolock'):
-            self.report_warning(
-                'This content is marked as not available in your area. Trying anyway ..')
+        geo_list = video_info.get('geoList')
+        country = geo_list[0] if geo_list else ''

        chapters = video_info['chapters']
        first_chapter = chapters[0]
@@ -83,14 +93,16 @@ class WatIE(InfoExtractor):
            fmts.append(('HD', 'webhd'))

        def compute_token(param):
-            timestamp = '%08x' % int(time.time())
+            timestamp = '%08x' % int(self._download_webpage(
+                'http://www.wat.tv/servertime', real_id,
+                'Downloading server time').split('|')[0])
            magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
            return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)

        for fmt in fmts:
            webid = '/%s/%s' % (fmt[1], real_id)
            video_url = self._download_webpage(
-                'http://www.wat.tv/get%s?token=%s&getURL=1' % (webid, compute_token(webid)),
+                'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
                real_id,
                'Downloding %s video URL' % fmt[0],
                'Failed to download %s video URL' % fmt[0],
@@ -77,9 +77,17 @@ class XTubeIE(InfoExtractor):
            'age_limit': 18,
        }

+
 class XTubeUserIE(InfoExtractor):
    IE_DESC = 'XTube user profile'
    _VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
+    _TEST = {
+        'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
+        'info_dict': {
+            'id': 'greenshowers',
+        },
+        'playlist_mincount': 155,
+    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -71,7 +71,8 @@ class YahooIE(InfoExtractor):
        if items_json is None:
            CONTENT_ID_REGEXES = [
                r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
-                r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"'
+                r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
+                r'"first_videoid"\s*:\s*"([^"]+)"',
            ]
            long_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID')
            video_id = long_id
@@ -9,6 +9,7 @@ from .ffmpeg import (
    FFmpegEmbedSubtitlePP,
 )
 from .xattrpp import XAttrMetadataPP
+from .execafterdownload import ExecAfterDownloadPP

 __all__ = [
    'AtomicParsleyPP',
@@ -19,4 +20,5 @@ __all__ = [
    'FFmpegExtractAudioPP',
    'FFmpegEmbedSubtitlePP',
    'XAttrMetadataPP',
+    'ExecAfterDownloadPP',
 ]
@@ -0,0 +1,31 @@
+from __future__ import unicode_literals
+
+import subprocess
+
+from .common import PostProcessor
+from ..utils import (
+    shlex_quote,
+    PostProcessingError,
+)
+
+
+class ExecAfterDownloadPP(PostProcessor):
+    def __init__(self, downloader=None, verboseOutput=None, exec_cmd=None):
+        self.verboseOutput = verboseOutput
+        self.exec_cmd = exec_cmd
+
+    def run(self, information):
+        cmd = self.exec_cmd
+        if not '{}' in cmd:
+            cmd += ' {}'
+
+        cmd = cmd.replace('{}', shlex_quote(information['filepath']))
+
+        self._downloader.to_screen("[exec] Executing command: %s" % cmd)
+        retCode = subprocess.call(cmd, shell=True)
+        if retCode != 0:
+            raise PostProcessingError(
+                'Command returned error code %d' % retCode)
+
+        return None, information  # by default, keep file and do nothing
+
@@ -192,6 +192,13 @@ try:
 except ImportError:  # Python 2.6
    from xml.parsers.expat import ExpatError as compat_xml_parse_error

+try:
+    from shlex import quote as shlex_quote
+except ImportError:  # Python < 3.3
+    def shlex_quote(s):
+        return "'" + s.replace("'", "'\"'\"'") + "'"
+
+
 def compat_ord(c):
    if type(c) is int: return c
    else: return ord(c)
@@ -291,30 +298,6 @@ def xpath_with_ns(path, ns_map):
            replaced.append('{%s}%s' % (ns_map[ns], tag))
    return '/'.join(replaced)

-def htmlentity_transform(matchobj):
-    """Transforms an HTML entity to a character.
-
-    This function receives a match object and is intended to be used with
-    the re.sub() function.
-    """
-    entity = matchobj.group(1)
-
-    # Known non-numeric HTML entity
-    if entity in compat_html_entities.name2codepoint:
-        return compat_chr(compat_html_entities.name2codepoint[entity])
-
-    mobj = re.match(u'(?u)#(x?\\d+)', entity)
-    if mobj is not None:
-        numstr = mobj.group(1)
-        if numstr.startswith(u'x'):
-            base = 16
-            numstr = u'0%s' % numstr
-        else:
-            base = 10
-        return compat_chr(int(numstr, base))
-
-    # Unknown entity in name, return its literal representation
-    return (u'&%s;' % entity)

 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
 class BaseHTMLParser(compat_html_parser.HTMLParser):
@@ -536,13 +519,33 @@ def orderedSet(iterable):
    return res


+def _htmlentity_transform(entity):
+    """Transforms an HTML entity to a character."""
+    # Known non-numeric HTML entity
+    if entity in compat_html_entities.name2codepoint:
+        return compat_chr(compat_html_entities.name2codepoint[entity])
+
+    mobj = re.match(r'#(x?[0-9]+)', entity)
+    if mobj is not None:
+        numstr = mobj.group(1)
+        if numstr.startswith(u'x'):
+            base = 16
+            numstr = u'0%s' % numstr
+        else:
+            base = 10
+        return compat_chr(int(numstr, base))
+
+    # Unknown entity in name, return its literal representation
+    return (u'&%s;' % entity)
+
+
 def unescapeHTML(s):
    if s is None:
        return None
    assert type(s) == compat_str

-    result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
-    return result
+    return re.sub(
+        r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)


 def encodeFilename(s, for_subprocess=False):
@@ -759,10 +762,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
        return ret

    def http_request(self, req):
-        for h,v in std_headers.items():
-            if h in req.headers:
-                del req.headers[h]
-            req.add_header(h, v)
+        for h, v in std_headers.items():
+            if h not in req.headers:
+                req.add_header(h, v)
        if 'Youtubedl-no-compression' in req.headers:
            if 'Accept-encoding' in req.headers:
                del req.headers['Accept-encoding']
@@ -1331,7 +1333,7 @@ def parse_duration(s):
        return None

    m = re.match(
-        r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
+        r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?(?P<ms>\.[0-9]+)?$', s)
    if not m:
        return None
    res = int(m.group('secs'))
@@ -1339,6 +1341,8 @@ def parse_duration(s):
        res += int(m.group('mins')) * 60
        if m.group('hours'):
            res += int(m.group('hours')) * 60 * 60
+    if m.group('ms'):
+        res += float(m.group('ms'))
    return res


@@ -1449,6 +1453,12 @@ def urlencode_postdata(*args, **kargs):
    return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')


+try:
+    etree_iter = xml.etree.ElementTree.Element.iter
+except AttributeError:  # Python <=2.6
+    etree_iter = lambda n: n.findall('.//*')
+
+
 def parse_xml(s):
    class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
        def doctype(self, name, pubid, system):
@@ -1456,7 +1466,14 @@ def parse_xml(s):

    parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
    kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
-    return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
+    tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
+    # Fix up XML parser in Python 2.x
+    if sys.version_info < (3, 0):
+        for n in etree_iter(tree):
+            if n.text is not None:
+                if not isinstance(n.text, compat_str):
+                    n.text = n.text.decode('utf-8')
+    return tree


 if sys.version_info < (3, 0) and sys.platform == 'win32':
@@ -1,2 +1,2 @@

-__version__ = '2014.08.25'
+__version__ = '2014.08.29'
Author	SHA1	Message	Date
Philipp Hagemeister	37709fae89	release 2014.08.29	2014-08-29 01:07:30 +02:00
Philipp Hagemeister	a81e4eb69d	[promptfile] Remove quality=1 and leave it to default	2014-08-29 01:07:18 +02:00
Naglis Jonaitis	8e72edfb19	[promptfile] Add new extractor	2014-08-29 00:20:10 +03:00
Philipp Hagemeister	863f08a92e	release 2014.08.28.2	2014-08-28 18:03:29 +02:00
Sergey M․	de2d9f5f1b	[rtlnl] Add support for progressive videos (Closes #3603 )	2014-08-28 22:54:06 +07:00
Philipp Hagemeister	a520c11241	release 2014.08.28.1	2014-08-28 14:01:01 +02:00
Philipp Hagemeister	b94744d157	[dropbox] Make sure ?dl=0 is ignore (Fixes #3605 )	2014-08-28 14:00:55 +02:00
Philipp Hagemeister	753727cded	[test_download] Expect a minimum file size This should detect when we're downloading a small text file by accident.	2014-08-28 13:59:45 +02:00
Philipp Hagemeister	daebaab692	[extractor/common] Correct typo	2014-08-28 13:04:49 +02:00
Philipp Hagemeister	3524cc25ca	[sportdeutschland] Add support for more plain videos	2014-08-28 10:55:32 +02:00
Philipp Hagemeister	29a7e1f261	release 2014.08.28	2014-08-28 01:38:15 +02:00
Philipp Hagemeister	df53b6358d	[veehd] Modernize	2014-08-28 01:37:57 +02:00
Philipp Hagemeister	f1a9d64eea	[extractor/common] Modernize	2014-08-28 01:04:43 +02:00
Philipp Hagemeister	27f774862d	Merge remote-tracking branch 'origin/master'	2014-08-28 01:01:04 +02:00
Philipp Hagemeister	b759a0d4d2	fix up imports	2014-08-28 01:00:59 +02:00
Philipp Hagemeister	22a6f15061	Move playlist tests to extractors. From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).	2014-08-28 00:58:24 +02:00
Philipp Hagemeister	259454525f	[nhl] Modernize	2014-08-27 19:52:55 +02:00
Philipp Hagemeister	ce6815aad3	[dailymotion] Correct test case	2014-08-27 19:20:20 +02:00
Philipp Hagemeister	4e408e479b	[utils] Modernize tests	2014-08-27 19:11:45 +02:00
Philipp Hagemeister	a9d3890fcb	[ted] Remove md5sum from test This failed multiple times, and the md5sum is not that important anyways.	2014-08-27 17:12:13 +02:00
Philipp Hagemeister	2857084000	[test_download] Improve playlist handling	2014-08-27 17:11:45 +02:00
Jaime Marquínez Ferrándiz	7798fad535	[downloader/rtmp] Use check_exectuble	2014-08-27 15:56:27 +02:00
Jaime Marquínez Ferrándiz	baf2907501	[downloader/hls] Return False if ffmpeg or avconv couldn't be found	2014-08-27 15:50:47 +02:00
Jaime Marquínez Ferrándiz	3c765c68c4	[downloader/hls] Use check_executable	2014-08-27 15:49:07 +02:00
Jaime Marquínez Ferrándiz	29153f49b3	[downloader/hls] Use the correct program when reporting an error	2014-08-27 15:40:02 +02:00
Sergey M․	071a236c5a	[yahoo] Add one more pattern for content id	2014-08-27 18:59:28 +07:00
Sergey M․	10437550f4	[cbs] Update test video id	2014-08-27 18:41:24 +07:00
Philipp Hagemeister	2929b3e71d	[vimeo] Move all testcases to extractors and clean up Previously, these extractors all defined their own suitable methods. No test_all_urls tests that everything is in order, automatically :)	2014-08-27 11:36:02 +02:00
Philipp Hagemeister	22449fa624	Improve test_all_urls output	2014-08-27 11:35:43 +02:00
Philipp Hagemeister	d36d3f420c	[vimeo] Move test case to where it belongs	2014-08-27 11:13:42 +02:00
Philipp Hagemeister	60ac04e57c	[facebook] Match video.php URLs	2014-08-27 11:08:47 +02:00
Philipp Hagemeister	a7680bf330	release 2014.08.27.1	2014-08-27 02:37:23 +02:00
Philipp Hagemeister	6d3d3fc083	[ard] Add suppor for plain ARD downloads (Fixes #3546 )	2014-08-27 02:36:57 +02:00
Philipp Hagemeister	aff216edf4	[generic] Prevent <video> search from skipping over empty sources (#3546 )	2014-08-27 02:09:59 +02:00
Philipp Hagemeister	1cb6dcdbbe	[generic] Do not download images as videos by accident	2014-08-27 02:07:11 +02:00
Philipp Hagemeister	3f514a353e	release 2014.08.27	2014-08-27 01:44:54 +02:00
Philipp Hagemeister	da9ec3b932	[muscivault] Add extractor (Fixes #3593 )	2014-08-27 01:44:47 +02:00
Philipp Hagemeister	191b7cbba9	[mfs] Modernize	2014-08-27 01:04:32 +02:00
Philipp Hagemeister	e8c59b9642	release 2014.08.26	2014-08-26 21:30:52 +02:00
Philipp Hagemeister	6abb066128	[sockshare] Fix title extraction (Fixes #3592 )	2014-08-26 21:30:30 +02:00
Philipp Hagemeister	8f1ea7cbb6	[empflix] Revert to XML parser Don't rely on the XML being broken (if they fix it, our code wouldn't work anymore). Instead, use the transform function we already have :) This partially reverts commit `c7bee2a725`.	2014-08-26 15:51:42 +02:00
Jaime Marquínez Ferrándiz	a204c85408	[ign] Fix extraction of video in articles	2014-08-26 15:38:29 +02:00
Sergey M․	15a1f4b8fe	[empflix] Extract thumbnail	2014-08-26 20:10:36 +07:00
Sergey M․	c7bee2a725	[empflix] Adapt to malformed config XML	2014-08-26 20:07:28 +07:00
Jaime Marquínez Ferrándiz	dbc1366b50	[mixcloud] Use a HEAD request when checking if the url is valid	2014-08-26 14:55:15 +02:00
Philipp Hagemeister	704df56da7	[sportdeutschland] add new extractor	2014-08-26 12:51:13 +02:00
Philipp Hagemeister	33ac271ba7	[utils] Let request headers override standard headers What was I thinking when writing this?	2014-08-26 11:51:48 +02:00
Philipp Hagemeister	0963f92f23	[eighttracks] modernize	2014-08-26 11:31:23 +02:00
Philipp Hagemeister	9a66c1079c	release 2014.08.25.3	2014-08-25 18:38:10 +02:00
Philipp Hagemeister	f971dcbba0	Merge branch 'master' of github.com:rg3/youtube-dl	2014-08-25 18:36:42 +02:00
Philipp Hagemeister	0990305d2a	[generic] Fix rss under Python 2.x and move test to extractor	2014-08-25 18:03:01 +02:00
Jaime Marquínez Ferrándiz	bcc069a937	[generic] Remove debug statement	2014-08-25 17:21:58 +02:00
Jaime Marquínez Ferrándiz	34708e1bb6	[bliptv] Remove superfluous characters in _VALID_URL regex	2014-08-25 17:16:11 +02:00
Philipp Hagemeister	829476b80a	[googlesearch] Move test to extractor	2014-08-25 17:02:52 +02:00
Philipp Hagemeister	1dd70fe330	release 2014.08.25.2	2014-08-25 16:52:28 +02:00
Philipp Hagemeister	067e922295	release 2014.08.25.1	2014-08-25 16:41:05 +02:00
Sergey M․	c28df2478f	[wat] Use server time and pass country argument (Closes #3579 )	2014-08-25 20:21:33 +07:00
Philipp Hagemeister	241f7a8ade	Merge remote-tracking branch 'JGjorgji/fix-leading-zeroes'	2014-08-25 13:59:19 +02:00
Philipp Hagemeister	b252735910	[extractor/common] Generate better f4m format IDs	2014-08-25 13:03:08 +02:00
Philipp Hagemeister	7adcbe7594	[rtlnl] Extract duration	2014-08-25 12:59:53 +02:00
Philipp Hagemeister	8d31fa3cce	[execafterdownload] Simplify (#3569 )	2014-08-25 10:18:01 +02:00
Philipp Hagemeister	1f06864e9a	[wat] Remove unused import	2014-08-25 10:15:32 +02:00
Philipp Hagemeister	348ae0a79e	Merge remote-tracking branch 'mcd1992/exec_after_download'	2014-08-25 09:44:11 +02:00
mcd1992	7833d941bb	Rebased with upstream/master	2014-08-24 15:04:50 -05:00
mcd1992	a2360a4c80	Moved from os.system to subprocess.call	2014-08-24 14:38:43 -05:00
mcd1992	a7cacbca2b	Implemented --exec option.	2014-08-24 14:38:43 -05:00
Gjorgji Jankovski	c6b4132a0a	renamed for consistency	2014-08-24 18:49:04 +02:00
Gjorgji Jankovski	ad260c90ab	Filenames are padded according to the playlist length	2014-08-24 18:23:32 +02:00