Merge branch 'master' of github.com:rg3/youtube-dl

2026-05-26 07:15:19 +00:00 · 2013-12-05 14:29:14 +01:00
parent 7fc3fa0545 673d1273ff
commit ef2fac6f4a
6 changed files with 98 additions and 6 deletions
@@ -33,6 +33,7 @@ TEST_ID = 'BaW_jenozKc'
 INFO_JSON_FILE = TEST_ID + '.info.json'
 DESCRIPTION_FILE = TEST_ID + '.mp4.description'
 EXPECTED_DESCRIPTION = u'''test chars:  "'/\ä↭𝕐
 test URL: https://github.com/rg3/youtube-dl/issues/1892
 This is a test video for youtube-dl.
@@ -145,6 +145,7 @@ from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .tf1 import TF1IE
 from .theplatform import ThePlatformIE
 from .thisav import ThisAVIE
 from .toutv import TouTvIE
 from .traileraddict import TrailerAddictIE
@@ -11,7 +11,7 @@ class HotNewHipHopIE(InfoExtractor):
        u'file': u'1435540.mp3',
        u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
        u'info_dict': {
-            u"title": u"Freddie Gibbs - Lay It Down"
+            u"title": u'Freddie Gibbs "Lay It Down"'
        }
    }
@@ -69,6 +69,21 @@ class MetacafeIE(InfoExtractor):
            u'age_limit': 18,
        },
    },
    # cbs video
    {
        u'url': u'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/',
        u'info_dict': {
            u'id': u'0rOxMBabDXN6',
            u'ext': u'flv',
            u'title': u'Samsung Galaxy Note 2: Samsung\'s next-generation phablet',
            u'description': u'md5:54d49fac53d26d5a0aaeccd061ada09d',
            u'duration': 129,
        },
        u'params': {
            # rtmp download
            u'skip_download': True,
        },
    },
    ]
@@ -106,10 +121,16 @@ class MetacafeIE(InfoExtractor):
        video_id = mobj.group(1)
-        # Check if video comes from YouTube
+        # the video may come from an external site
-        mobj2 = re.match(r'^yt-(.*)$', video_id)
+        m_external = re.match('^(\w{2})-(.*)$', video_id)
-        if mobj2 is not None:
+        if m_external is not None:
-            return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')]
+            prefix, ext_id = m_external.groups()
            # Check if video comes from YouTube
            if prefix == 'yt':
                return self.url_result('http://www.youtube.com/watch?v=%s' % ext_id, 'Youtube')
            # CBS videos use theplatform.com
            if prefix == 'cb':
                return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
        # Retrieve video webpage to extract further information
        req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id)
@@ -0,0 +1,69 @@
 import re
 import json
 from .common import InfoExtractor
 from ..utils import (
    xpath_with_ns,
    find_xpath_attr,
 )
 _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
 class ThePlatformIE(InfoExtractor):
    _VALID_URL = r'(?:https?://link\.theplatform\.com/s/[^/]+/|theplatform:)(?P<id>[^/\?]+)'
    _TEST = {
        # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
        u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
        u'info_dict': {
            u'id': u'e9I_cZgTgIPd',
            u'ext': u'flv',
            u'title': u'Blackberry\'s big, bold Z30',
            u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
            u'duration': 247,
        },
        u'params': {
            # rtmp download
            u'skip_download': True,
        },
    }
    def _get_info(self, video_id):
        smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
            'format=smil&mbr=true'.format(video_id))
        meta = self._download_xml(smil_url, video_id)
        info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
        info_json = self._download_webpage(info_url, video_id)
        info = json.loads(info_json)
        head = meta.find(_x('smil:head'))
        body = meta.find(_x('smil:body'))
        base_url = head.find(_x('smil:meta')).attrib['base']
        switch = body.find(_x('smil:switch'))
        formats = []
        for f in switch.findall(_x('smil:video')):
            attr = f.attrib
            formats.append({
                'url': base_url,
                'play_path': 'mp4:' + attr['src'],
                'ext': 'flv',
                'width': int(attr['width']),
                'height': int(attr['height']),
                'vbr': int(attr['system-bitrate']),
            })
        formats.sort(key=lambda f: (f['height'], f['width'], f['vbr']))
        return {
            'id': video_id,
            'title': info['title'],
            'formats': formats,
            'description': info['description'],
            'thumbnail': info['defaultThumbnailUrl'],
            'duration': info['duration']//1000,
        }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        return self._get_info(video_id)
@@ -15,7 +15,7 @@ class VevoIE(InfoExtractor):
    Accepts urls from vevo.com or in the format 'vevo:{id}'
    (currently used by MTVIE)
    """
-    _VALID_URL = r'((http://www\.vevo\.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
+    _VALID_URL = r'((http://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?)|(vevo:))(?P<id>.*?)(\?|$)'
    _TESTS = [{
        u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
        u'file': u'GB1101300280.mp4',