release 2014.01.22.4

Merge branch 'paged-lists'
Conflicts: test/test_utils.py youtube_dl/extractor/youtube.py
2026-06-15 00:50:13 +00:00 · 2014-01-22 21:01:52 +01:00 · 2014-01-22 20:00:16 +01:00 · 2014-01-22 19:58:31 +01:00 · 2014-01-22 19:02:48 +01:00 · 2014-01-22 19:01:41 +01:00
11 changed files with 287 additions and 120 deletions
@@ -158,7 +158,9 @@ which means you can modify it, redistribute it or use it however you like.
 ## Video Format Options:
    -f, --format FORMAT        video format code, specify the order of
                               preference using slashes: "-f 22/17/18". "-f mp4"
-                               and "-f flv" are also supported
+                               and "-f flv" are also supported. You can also use
+                               the special names "best", "bestaudio", "worst",
+                               and "worstaudio"
    --all-formats              download all available video formats
    --prefer-free-formats      prefer free video formats unless a specific one
                               is requested
@@ -1,5 +1,7 @@
 #!/usr/bin/env python

+from __future__ import unicode_literals
+
 # Allow direct execution
 import os
 import sys
@@ -30,125 +32,155 @@ class TestFormatSelection(unittest.TestCase):
        ydl = YDL()
        ydl.params['prefer_free_formats'] = True
        formats = [
-            {u'ext': u'webm', u'height': 460},
-            {u'ext': u'mp4',  u'height': 460},
+            {'ext': 'webm', 'height': 460},
+            {'ext': 'mp4',  'height': 460},
        ]
-        info_dict = {u'formats': formats, u'extractor': u'test'}
+        info_dict = {'formats': formats, 'extractor': 'test'}
        yie = YoutubeIE(ydl)
        yie._sort_formats(info_dict['formats'])
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded[u'ext'], u'webm')
+        self.assertEqual(downloaded['ext'], 'webm')

        # Different resolution => download best quality (mp4)
        ydl = YDL()
        ydl.params['prefer_free_formats'] = True
        formats = [
-            {u'ext': u'webm', u'height': 720},
-            {u'ext': u'mp4', u'height': 1080},
+            {'ext': 'webm', 'height': 720},
+            {'ext': 'mp4', 'height': 1080},
        ]
-        info_dict[u'formats'] = formats
+        info_dict['formats'] = formats
        yie = YoutubeIE(ydl)
        yie._sort_formats(info_dict['formats'])
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded[u'ext'], u'mp4')
+        self.assertEqual(downloaded['ext'], 'mp4')

        # No prefer_free_formats => prefer mp4 and flv for greater compatibilty
        ydl = YDL()
        ydl.params['prefer_free_formats'] = False
        formats = [
-            {u'ext': u'webm', u'height': 720},
-            {u'ext': u'mp4', u'height': 720},
-            {u'ext': u'flv', u'height': 720},
+            {'ext': 'webm', 'height': 720},
+            {'ext': 'mp4', 'height': 720},
+            {'ext': 'flv', 'height': 720},
        ]
-        info_dict[u'formats'] = formats
+        info_dict['formats'] = formats
        yie = YoutubeIE(ydl)
        yie._sort_formats(info_dict['formats'])
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded[u'ext'], u'mp4')
+        self.assertEqual(downloaded['ext'], 'mp4')

        ydl = YDL()
        ydl.params['prefer_free_formats'] = False
        formats = [
-            {u'ext': u'flv', u'height': 720},
-            {u'ext': u'webm', u'height': 720},
+            {'ext': 'flv', 'height': 720},
+            {'ext': 'webm', 'height': 720},
        ]
-        info_dict[u'formats'] = formats
+        info_dict['formats'] = formats
        yie = YoutubeIE(ydl)
        yie._sort_formats(info_dict['formats'])
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded[u'ext'], u'flv')
+        self.assertEqual(downloaded['ext'], 'flv')

    def test_format_limit(self):
        formats = [
-            {u'format_id': u'meh', u'url': u'http://example.com/meh', 'preference': 1},
-            {u'format_id': u'good', u'url': u'http://example.com/good', 'preference': 2},
-            {u'format_id': u'great', u'url': u'http://example.com/great', 'preference': 3},
-            {u'format_id': u'excellent', u'url': u'http://example.com/exc', 'preference': 4},
+            {'format_id': 'meh', 'url': 'http://example.com/meh', 'preference': 1},
+            {'format_id': 'good', 'url': 'http://example.com/good', 'preference': 2},
+            {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
+            {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
        ]
        info_dict = {
-            u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
+            'formats': formats, 'extractor': 'test', 'id': 'testvid'}

        ydl = YDL()
        ydl.process_ie_result(info_dict)
        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded[u'format_id'], u'excellent')
+        self.assertEqual(downloaded['format_id'], 'excellent')

        ydl = YDL({'format_limit': 'good'})
        assert ydl.params['format_limit'] == 'good'
        ydl.process_ie_result(info_dict.copy())
        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded[u'format_id'], u'good')
+        self.assertEqual(downloaded['format_id'], 'good')

        ydl = YDL({'format_limit': 'great', 'format': 'all'})
        ydl.process_ie_result(info_dict.copy())
-        self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh')
-        self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good')
-        self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great')
+        self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'meh')
+        self.assertEqual(ydl.downloaded_info_dicts[1]['format_id'], 'good')
+        self.assertEqual(ydl.downloaded_info_dicts[2]['format_id'], 'great')
        self.assertTrue('3' in ydl.msgs[0])

        ydl = YDL()
        ydl.params['format_limit'] = 'excellent'
        ydl.process_ie_result(info_dict.copy())
        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded[u'format_id'], u'excellent')
+        self.assertEqual(downloaded['format_id'], 'excellent')

    def test_format_selection(self):
        formats = [
-            {u'format_id': u'35', u'ext': u'mp4', 'preference': 1},
-            {u'format_id': u'45', u'ext': u'webm', 'preference': 2},
-            {u'format_id': u'47', u'ext': u'webm', 'preference': 3},
-            {u'format_id': u'2', u'ext': u'flv', 'preference': 4},
+            {'format_id': '35', 'ext': 'mp4', 'preference': 1},
+            {'format_id': '45', 'ext': 'webm', 'preference': 2},
+            {'format_id': '47', 'ext': 'webm', 'preference': 3},
+            {'format_id': '2', 'ext': 'flv', 'preference': 4},
        ]
-        info_dict = {u'formats': formats, u'extractor': u'test'}
+        info_dict = {'formats': formats, 'extractor': 'test'}

-        ydl = YDL({'format': u'20/47'})
+        ydl = YDL({'format': '20/47'})
        ydl.process_ie_result(info_dict.copy())
        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded['format_id'], u'47')
+        self.assertEqual(downloaded['format_id'], '47')

-        ydl = YDL({'format': u'20/71/worst'})
+        ydl = YDL({'format': '20/71/worst'})
        ydl.process_ie_result(info_dict.copy())
        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded['format_id'], u'35')
+        self.assertEqual(downloaded['format_id'], '35')

        ydl = YDL()
        ydl.process_ie_result(info_dict.copy())
        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded['format_id'], u'2')
+        self.assertEqual(downloaded['format_id'], '2')

-        ydl = YDL({'format': u'webm/mp4'})
+        ydl = YDL({'format': 'webm/mp4'})
        ydl.process_ie_result(info_dict.copy())
        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded['format_id'], u'47')
+        self.assertEqual(downloaded['format_id'], '47')

-        ydl = YDL({'format': u'3gp/40/mp4'})
+        ydl = YDL({'format': '3gp/40/mp4'})
        ydl.process_ie_result(info_dict.copy())
        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded['format_id'], u'35')
+        self.assertEqual(downloaded['format_id'], '35')
+
+    def test_format_selection_audio(self):
+        formats = [
+            {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'},
+            {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'},
+            {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'},
+            {'format_id': 'vid', 'ext': 'mp4', 'preference': 4},
+        ]
+        info_dict = {'formats': formats, 'extractor': 'test'}
+
+        ydl = YDL({'format': 'bestaudio'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'audio-high')
+
+        ydl = YDL({'format': 'worstaudio'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'audio-low')
+
+        formats = [
+            {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1},
+            {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2},
+        ]
+        info_dict = {'formats': formats, 'extractor': 'test'}
+
+        ydl = YDL({'format': 'bestaudio/worstaudio/best'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'vid-high')

    def test_youtube_format_selection(self):
        order = [
@@ -200,17 +232,17 @@ class TestFormatSelection(unittest.TestCase):

    def test_prepare_filename(self):
        info = {
-            u'id': u'1234',
-            u'ext': u'mp4',
-            u'width': None,
+            'id': '1234',
+            'ext': 'mp4',
+            'width': None,
        }
        def fname(templ):
            ydl = YoutubeDL({'outtmpl': templ})
            return ydl.prepare_filename(info)
-        self.assertEqual(fname(u'%(id)s.%(ext)s'), u'1234.mp4')
-        self.assertEqual(fname(u'%(id)s-%(width)s.%(ext)s'), u'1234-NA.mp4')
+        self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
+        self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
        # Replace missing fields with 'NA'
-        self.assertEqual(fname(u'%(uploader_date)s-%(id)s.%(ext)s'), u'NA-1234.mp4')
+        self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')


 if __name__ == '__main__':
@@ -19,6 +19,7 @@ from youtube_dl.utils import (
    fix_xml_ampersands,
    get_meta_content,
    orderedSet,
+    PagedList,
    parse_duration,
    sanitize_filename,
    shell_quote,
@@ -214,5 +215,26 @@ class TestUtil(unittest.TestCase):
            fix_xml_ampersands('&#1234;&#x1abC;'), '&#1234;&#x1abC;')
        self.assertEqual(fix_xml_ampersands('&#&#'), '&amp;#&amp;#')

+    def test_paged_list(self):
+        def testPL(size, pagesize, sliceargs, expected):
+            def get_page(pagenum):
+                firstid = pagenum * pagesize
+                upto = min(size, pagenum * pagesize + pagesize)
+                for i in range(firstid, upto):
+                    yield i
+
+            pl = PagedList(get_page, pagesize)
+            got = pl.getslice(*sliceargs)
+            self.assertEqual(got, expected)
+
+        testPL(5, 2, (), [0, 1, 2, 3, 4])
+        testPL(5, 2, (1,), [1, 2, 3, 4])
+        testPL(5, 2, (2,), [2, 3, 4])
+        testPL(5, 2, (4,), [4])
+        testPL(5, 2, (0, 3), [0, 1, 2])
+        testPL(5, 2, (1, 4), [1, 2, 3])
+        testPL(5, 2, (2, 99), [2, 3, 4])
+        testPL(5, 2, (20, 99), [])
+
 if __name__ == '__main__':
    unittest.main()
@@ -39,6 +39,7 @@ from .utils import (
    locked_file,
    make_HTTPS_handler,
    MaxDownloadsReached,
+    PagedList,
    PostProcessingError,
    platform_name,
    preferredencoding,
@@ -578,19 +579,27 @@ class YoutubeDL(object):

            playlist_results = []

-            n_all_entries = len(ie_result['entries'])
            playliststart = self.params.get('playliststart', 1) - 1
            playlistend = self.params.get('playlistend', None)
            # For backwards compatibility, interpret -1 as whole list
            if playlistend == -1:
                playlistend = None

-            entries = ie_result['entries'][playliststart:playlistend]
-            n_entries = len(entries)
-
-            self.to_screen(
-                "[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
-                (ie_result['extractor'], playlist, n_all_entries, n_entries))
+            if isinstance(ie_result['entries'], list):
+                n_all_entries = len(ie_result['entries'])
+                entries = ie_result['entries'][playliststart:playlistend]
+                n_entries = len(entries)
+                self.to_screen(
+                    "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
+                    (ie_result['extractor'], playlist, n_all_entries, n_entries))
+            else:
+                assert isinstance(ie_result['entries'], PagedList)
+                entries = ie_result['entries'].getslice(
+                    playliststart, playlistend)
+                n_entries = len(entries)
+                self.to_screen(
+                    "[%s] playlist %s: Downloading %d videos" %
+                    (ie_result['extractor'], playlist, n_entries))

            for i, entry in enumerate(entries, 1):
                self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
@@ -637,6 +646,18 @@ class YoutubeDL(object):
            return available_formats[-1]
        elif format_spec == 'worst':
            return available_formats[0]
+        elif format_spec == 'bestaudio':
+            audio_formats = [
+                f for f in available_formats
+                if f.get('vcodec') == 'none']
+            if audio_formats:
+                return audio_formats[-1]
+        elif format_spec == 'worstaudio':
+            audio_formats = [
+                f for f in available_formats
+                if f.get('vcodec') == 'none']
+            if audio_formats:
+                return audio_formats[0]
        else:
            extensions = ['mp4', 'flv', 'webm', '3gp']
            if format_spec in extensions:
@@ -701,7 +722,7 @@ class YoutubeDL(object):
            self.list_formats(info_dict)
            return

-        req_format = self.params.get('format', 'best')
+        req_format = self.params.get('format')
        if req_format is None:
            req_format = 'best'
        formats_to_download = []
@@ -1094,6 +1115,8 @@ class YoutubeDL(object):
                res += 'audio'
            if fdict.get('abr') is not None:
                res += '@%3dk' % fdict['abr']
+            if fdict.get('asr') is not None:
+                res += ' (%5dHz)' % fdict['asr']
            if fdict.get('filesize') is not None:
                if res:
                    res += ', '
@@ -256,8 +256,8 @@ def parseOpts(overrideArguments=None):


    video_format.add_option('-f', '--format',
-            action='store', dest='format', metavar='FORMAT', default='best',
-            help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
+            action='store', dest='format', metavar='FORMAT', default=None,
+            help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestaudio", "worst", and "worstaudio"')
    video_format.add_option('--all-formats',
            action='store_const', dest='format', help='download all available video formats', const='all')
    video_format.add_option('--prefer-free-formats',
@@ -624,6 +624,10 @@ def _real_main(argv=None):
    if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')

+    # Do not download videos when there are audio-only formats
+    if opts.extractaudio and not opts.keepvideo and opts.format is None:
+        opts.format = 'bestaudio/best'
+
    # --all-sub automatically sets --write-sub if --write-auto-sub is not given
    # this was the old behaviour if only --all-sub was given.
    if opts.allsubtitles and (opts.writeautomaticsub == False):
@@ -63,6 +63,7 @@ class InfoExtractor(object):
                    * tbr        Average bitrate of audio and video in KBit/s
                    * abr        Average audio bitrate in KBit/s
                    * acodec     Name of the audio codec in use
+                    * asr        Audio sampling rate in Hertz
                    * vbr        Average video bitrate in KBit/s
                    * vcodec     Name of the video codec in use
                    * filesize   The number of bytes, if known in advance
@@ -20,6 +20,7 @@ class CSpanIE(InfoExtractor):
            'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
            'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
        },
+        'skip': 'Regularly fails on travis, for unknown reasons',
    }

    def _real_extract(self, url):
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re

 from .common import InfoExtractor
@@ -9,12 +11,12 @@ from ..utils import (
 class XVideosIE(InfoExtractor):
    _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
    _TEST = {
-        u'url': u'http://www.xvideos.com/video939581/funny_porns_by_s_-1',
-        u'file': u'939581.flv',
-        u'md5': u'1d0c835822f0a71a7bf011855db929d0',
-        u'info_dict': {
-            u"title": u"Funny Porns By >>>>S<<<<<< -1",
-            u"age_limit": 18,
+        'url': 'http://www.xvideos.com/video939581/funny_porns_by_s_-1',
+        'file': '939581.flv',
+        'md5': '1d0c835822f0a71a7bf011855db929d0',
+        'info_dict': {
+            "title": "Funny Porns By >>>>S<<<<<< -1",
+            "age_limit": 18,
        }
    }

@@ -27,18 +29,18 @@ class XVideosIE(InfoExtractor):
        self.report_extraction(video_id)

        # Extract video URL
-        video_url = compat_urllib_parse.unquote(self._search_regex(r'flv_url=(.+?)&',
-            webpage, u'video URL'))
+        video_url = compat_urllib_parse.unquote(
+            self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))

        # Extract title
-        video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XVID',
-            webpage, u'title')
+        video_title = self._html_search_regex(
+            r'<title>(.*?)\s+-\s+XVID', webpage, 'title')

        # Extract video thumbnail
-        video_thumbnail = self._search_regex(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)',
-            webpage, u'thumbnail', fatal=False)
+        video_thumbnail = self._search_regex(
+            r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)

-        info = {
+        return {
            'id': video_id,
            'url': video_url,
            'uploader': None,
@@ -49,5 +51,3 @@ class XVideosIE(InfoExtractor):
            'description': None,
            'age_limit': 18,
        }
-
-        return [info]
@@ -27,6 +27,8 @@ from ..utils import (
    get_element_by_id,
    get_element_by_attribute,
    ExtractorError,
+    int_or_none,
+    PagedList,
    RegexNotFoundError,
    unescapeHTML,
    unified_strdate,
@@ -270,6 +272,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                u"uploader_id": u"setindia"
            }
        },
+        {
+            u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
+            u"file": u"a9LDPn-MO4I.m4a",
+            u"note": u"256k DASH audio (format 141) via DASH manifest",
+            u"params": {
+                u"format": "141"
+            },
+            u"info_dict": {
+                u"upload_date": "20121002",
+                u"uploader_id": "8KVIDEO",
+                u"description": "No description available.",
+                u"uploader": "8KVIDEO",
+                u"title": "UHDTV TEST 8K VIDEO.mp4"
+            }
+        },
    ]


@@ -1067,18 +1084,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        video_id = mobj.group(2)
        return video_id

-    def _get_video_url_list(self, url_map):
-        """
-        Transform a dictionary in the format {itag:url} to a list of (itag, url)
-        with the requested formats.
-        """
-        existing_formats = [x for x in self._formats if x in url_map]
-        if len(existing_formats) == 0:
-            raise ExtractorError(u'no known formats available for video')
-        video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
-        video_url_list.reverse() # order worst to best
-        return video_url_list
-
    def _extract_from_m3u8(self, manifest_url, video_id):
        url_map = {}
        def _get_urls(_manifest):
@@ -1252,7 +1257,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                video_annotations = self._extract_annotations(video_id)

        # Decide which formats to download
-
        try:
            mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
            if not mobj:
@@ -1277,9 +1281,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        except ValueError:
            pass

+        def _map_to_format_list(urlmap):
+            formats = []
+            for itag, video_real_url in urlmap.items():
+                dct = {
+                    'format_id': itag,
+                    'url': video_real_url,
+                    'player_url': player_url,
+                }
+                dct.update(self._formats[itag])
+                formats.append(dct)
+            return formats
+
        if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
            self.report_rtmp_download()
-            video_url_list = [('_rtmp', video_info['conn'][0])]
+            formats = [{
+                'format_id': '_rtmp',
+                'protocol': 'rtmp',
+                'url': video_info['conn'][0],
+                'player_url': player_url,
+            }]
        elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
            if 'rtmpe%3Dyes' in encoded_url_map:
@@ -1324,23 +1345,49 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                    if 'ratebypass' not in url:
                        url += '&ratebypass=yes'
                    url_map[url_data['itag'][0]] = url
-            video_url_list = self._get_video_url_list(url_map)
+            formats = _map_to_format_list(url_map)
        elif video_info.get('hlsvp'):
            manifest_url = video_info['hlsvp'][0]
            url_map = self._extract_from_m3u8(manifest_url, video_id)
-            video_url_list = self._get_video_url_list(url_map)
+            formats = _map_to_format_list(url_map)
        else:
            raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')

-        formats = []
-        for itag, video_real_url in video_url_list:
-            dct = {
-                'format_id': itag,
-                'url': video_real_url,
-                'player_url': player_url,
-            }
-            dct.update(self._formats[itag])
-            formats.append(dct)
+        # Look for the DASH manifest
+        dash_manifest_url_lst = video_info.get('dashmpd')
+        if dash_manifest_url_lst and dash_manifest_url_lst[0]:
+            try:
+                dash_doc = self._download_xml(
+                    dash_manifest_url_lst[0], video_id,
+                    note=u'Downloading DASH manifest',
+                    errnote=u'Could not download DASH manifest')
+                for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+                    url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
+                    if url_el is None:
+                        continue
+                    format_id = r.attrib['id']
+                    video_url = url_el.text
+                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
+                    f = {
+                        'format_id': format_id,
+                        'url': video_url,
+                        'width': int_or_none(r.attrib.get('width')),
+                        'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
+                        'asr': int_or_none(r.attrib.get('audioSamplingRate')),
+                        'filesize': filesize,
+                    }
+                    try:
+                        existing_format = next(
+                            fo for fo in formats
+                            if fo['format_id'] == format_id)
+                    except StopIteration:
+                        f.update(self._formats.get(format_id, {}))
+                        formats.append(f)
+                    else:
+                        existing_format.update(f)
+
+            except (ExtractorError, KeyError) as e:
+                self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)

        self._sort_formats(formats)

@@ -1580,44 +1627,35 @@ class YoutubeUserIE(InfoExtractor):
        # page by page until there are no video ids - it means we got
        # all of them.

-        url_results = []
-
-        for pagenum in itertools.count(0):
+        def download_page(pagenum):
            start_index = pagenum * self._GDATA_PAGE_SIZE + 1

            gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
-            page = self._download_webpage(gdata_url, username,
-                                          u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
+            page = self._download_webpage(
+                gdata_url, username,
+                u'Downloading video ids from %d to %d' % (
+                    start_index, start_index + self._GDATA_PAGE_SIZE))

            try:
                response = json.loads(page)
            except ValueError as err:
                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
            if 'entry' not in response['feed']:
-                # Number of videos is a multiple of self._MAX_RESULTS
-                break
+                return

            # Extract video identifiers
            entries = response['feed']['entry']
            for entry in entries:
                title = entry['title']['$t']
                video_id = entry['id']['$t'].split('/')[-1]
-                url_results.append({
+                yield {
                    '_type': 'url',
                    'url': video_id,
                    'ie_key': 'Youtube',
                    'id': 'video_id',
                    'title': title,
-                })
-
-            # A little optimization - if current page is not
-            # "full", ie. does not contain PAGE_SIZE video ids then
-            # we can assume that this page is the last one - there
-            # are no more ids on further pages - no need to query
-            # again.
-
-            if len(entries) < self._GDATA_PAGE_SIZE:
-                break
+                }
+        url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)

        return self.playlist_result(url_results, playlist_title=username)

@@ -6,6 +6,7 @@ import datetime
 import email.utils
 import errno
 import gzip
+import itertools
 import io
 import json
 import locale
@@ -1131,8 +1132,8 @@ class HEADRequest(compat_urllib_request.Request):
        return "HEAD"


-def int_or_none(v):
-    return v if v is None else int(v)
+def int_or_none(v, scale=1):
+    return v if v is None else (int(v) // scale)


 def parse_duration(s):
@@ -1164,3 +1165,46 @@ def check_executable(exe, args=[]):
    except OSError:
        return False
    return exe
+
+
+class PagedList(object):
+    def __init__(self, pagefunc, pagesize):
+        self._pagefunc = pagefunc
+        self._pagesize = pagesize
+
+    def getslice(self, start=0, end=None):
+        res = []
+        for pagenum in itertools.count(start // self._pagesize):
+            firstid = pagenum * self._pagesize
+            nextfirstid = pagenum * self._pagesize + self._pagesize
+            if start >= nextfirstid:
+                continue
+
+            page_results = list(self._pagefunc(pagenum))
+
+            startv = (
+                start % self._pagesize
+                if firstid <= start < nextfirstid
+                else 0)
+
+            endv = (
+                ((end - 1) % self._pagesize) + 1
+                if (end is not None and firstid <= end <= nextfirstid)
+                else None)
+
+            if startv != 0 or endv is not None:
+                page_results = page_results[startv:endv]
+            res.extend(page_results)
+
+            # A little optimization - if current page is not "full", ie. does
+            # not contain page_size videos then we can assume that this page
+            # is the last one - there are no more ids on further pages -
+            # i.e. no need to query again.
+            if len(page_results) + startv < self._pagesize:
+                break
+
+            # If we got the whole page, but the next page is not interesting,
+            # break out early as well
+            if end == nextfirstid:
+                break
+        return res
@@ -1,2 +1,2 @@

-__version__ = '2014.01.22.2'
+__version__ = '2014.01.22.4'
Author	SHA1	Message	Date
Philipp Hagemeister	c4cd138b92	release 2014.01.22.4	2014-01-22 21:01:52 +01:00
Philipp Hagemeister	65697b3bf3	Merge branch 'paged-lists' Conflicts: test/test_utils.py youtube_dl/extractor/youtube.py	2014-01-22 20:00:16 +01:00
Philipp Hagemeister	50317b111d	Merge branch 'youtube-dash-manifest' Conflicts: youtube_dl/extractor/youtube.py	2014-01-22 19:58:31 +01:00
Philipp Hagemeister	d7975ea287	[xvideos] Simplify	2014-01-22 19:02:48 +01:00
Mike Col	714d709a31	[xvideos] Fix thumbnail extraction Signed-off-by: Philipp Hagemeister <phihag@phihag.de>	2014-01-22 19:01:41 +01:00
Philipp Hagemeister	11577ec054	[cspan] Disable test It works fine from all my machines, no matter where, but from travis, we get lots of 403s. Maybe another project is scraping CSPAN from travis and they're blocking the travis machines?	2014-01-22 15:10:02 +01:00
Philipp Hagemeister	79bf58f9b5	Document -f worstaudio as well	2014-01-22 14:55:45 +01:00
Philipp Hagemeister	cd8a562267	release 2014.01.22.3	2014-01-22 14:53:36 +01:00
Philipp Hagemeister	de3ef3ed58	Default to -f best-audio when only audio is requested	2014-01-22 14:53:23 +01:00
Philipp Hagemeister	8908741806	Use unicode_literals in test_YoutubeDL	2014-01-22 14:48:02 +01:00
Philipp Hagemeister	ba7678f9cc	Add -f bestaudio (Fixes #2163 )	2014-01-22 14:47:29 +01:00
Philipp Hagemeister	b7ab059084	Add infrastructure for paged lists This commit allows to download pages in playlists as needed instead of all at once. Before this commit, youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download took quite some time - now it's almost instantaneous. As an example, the youtube:user extractor has been converted. Fixes #2175	2014-01-20 11:36:47 +01:00
Philipp Hagemeister	dd27fd1739	[youtube] Download DASH manifest If given, download and parse the DASH manifest file, in order to get ultra-HQ formats. Fixes #2166	2014-01-19 05:47:20 +01:00