release 2016.03.27

[extractor/common] Filter out unsupported encrypted media for f4m formats (Closes #8573 )
[downloader/f4m] Extract routine for removing unsupported encrypted media
2026-05-10 23:53:44 +00:00 · 2016-03-27 16:56:33 +02:00 · 2016-03-27 07:42:38 +06:00 · 2016-03-27 07:41:19 +06:00 · 2016-03-27 07:03:08 +06:00 · 2016-03-27 04:36:02 +08:00
41 changed files with 223 additions and 78 deletions
@@ -39,6 +39,8 @@ from .compat import (
    compat_urllib_request_DataHandler,
 )
 from .utils import (
+    age_restricted,
+    args_to_str,
    ContentTooShortError,
    date_from_str,
    DateRange,
@@ -58,13 +60,16 @@ from .utils import (
    PagedList,
    parse_filesize,
    PerRequestProxyHandler,
-    PostProcessingError,
    platform_name,
+    PostProcessingError,
    preferredencoding,
+    prepend_extension,
    render_table,
+    replace_extension,
    SameFileError,
    sanitize_filename,
    sanitize_path,
+    sanitize_url,
    sanitized_Request,
    std_headers,
    subtitles_filename,
@@ -75,10 +80,6 @@ from .utils import (
    write_string,
    YoutubeDLCookieProcessor,
    YoutubeDLHandler,
-    prepend_extension,
-    replace_extension,
-    args_to_str,
-    age_restricted,
 )
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractors
@@ -1229,6 +1230,7 @@ class YoutubeDL(object):
                t.get('preference'), t.get('width'), t.get('height'),
                t.get('id'), t.get('url')))
            for i, t in enumerate(thumbnails):
+                t['url'] = sanitize_url(t['url'])
                if t.get('width') and t.get('height'):
                    t['resolution'] = '%dx%d' % (t['width'], t['height'])
                if t.get('id') is None:
@@ -1263,6 +1265,7 @@ class YoutubeDL(object):
        if subtitles:
            for _, subtitle in subtitles.items():
                for subtitle_format in subtitle:
+                    subtitle_format['url'] = sanitize_url(subtitle_format['url'])
                    if 'ext' not in subtitle_format:
                        subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

@@ -1292,6 +1295,8 @@ class YoutubeDL(object):
            if 'url' not in format:
                raise ExtractorError('Missing "url" key in result (index %d)' % i)

+            format['url'] = sanitize_url(format['url'])
+
            if format.get('format_id') is None:
                format['format_id'] = compat_str(i)
            else:
@@ -223,6 +223,12 @@ def write_metadata_tag(stream, metadata):
        write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))


+def remove_encrypted_media(media):
+    return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
+                                 'drmAdditionalHeaderSetId' not in e.attrib,
+                       media))
+
+
 def _add_ns(prop):
    return '{http://ns.adobe.com/f4m/1.0}%s' % prop

@@ -244,9 +250,7 @@ class F4mFD(FragmentFD):
            # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
            if 'id' not in e.attrib:
                self.report_error('Missing ID in f4m DRM')
-        media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
-                                      'drmAdditionalHeaderSetId' not in e.attrib,
-                            media))
+        media = remove_encrypted_media(media)
        if not media:
            self.report_error('Unsupported DRM')
        return media
@@ -44,6 +44,7 @@ class Abc7NewsIE(InfoExtractor):
            'contentURL', webpage, 'm3u8 url', fatal=True)

        formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
+        self._sort_formats(formats)

        title = self._og_search_title(webpage).strip()
        description = self._og_search_description(webpage).strip()
@@ -120,6 +120,7 @@ class AzubuLiveIE(InfoExtractor):
        bc_info = self._download_json(req, user)
        m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
        formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
+        self._sort_formats(formats)

        return {
            'id': info['id'],
@@ -94,6 +94,7 @@ class BetIE(InfoExtractor):
            xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')

        formats = self._extract_smil_formats(smil_url, display_id)
+        self._sort_formats(formats)

        return {
            'id': video_id,
@@ -136,13 +136,16 @@ class BrightcoveLegacyIE(InfoExtractor):
        else:
            flashvars = {}

+        data_url = object_doc.attrib.get('data', '')
+        data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query)
+
        def find_param(name):
            if name in flashvars:
                return flashvars[name]
            node = find_xpath_attr(object_doc, './param', 'name', name)
            if node is not None:
                return node.attrib['value']
-            return None
+            return data_url_params.get(name)

        params = {}

@@ -294,7 +297,7 @@ class BrightcoveLegacyIE(InfoExtractor):
            'uploader': video_info.get('publisherName'),
        }

-        renditions = video_info.get('renditions')
+        renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
        if renditions:
            formats = []
            for rend in renditions:
@@ -316,13 +319,23 @@ class BrightcoveLegacyIE(InfoExtractor):
                if ext is None:
                    ext = determine_ext(url)
                size = rend.get('size')
-                formats.append({
+                a_format = {
                    'url': url,
                    'ext': ext,
                    'height': rend.get('frameHeight'),
                    'width': rend.get('frameWidth'),
                    'filesize': size if size != 0 else None,
-                })
+                }
+
+                # m3u8 manifests with remote == false are media playlists
+                # Not calling _extract_m3u8_formats here to save network traffic
+                if ext == 'm3u8':
+                    a_format.update({
+                        'ext': 'mp4',
+                        'protocol': 'm3u8',
+                    })
+
+                formats.append(a_format)
            self._sort_formats(formats)
            info['formats'] = formats
        elif video_info.get('FLVFullLengthURL') is not None:
@@ -122,6 +122,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
            for entry in f4m_formats:
                # URLs without the extra param induce an 404 error
                entry.update({'extra_param_to_segment_url': hdcore_sign})
+        self._sort_formats(f4m_formats)

        return {
            'id': video_id,
@@ -48,6 +48,7 @@ class ChaturbateIE(InfoExtractor):
            raise ExtractorError('Unable to find stream URL')

        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+        self._sort_formats(formats)

        return {
            'id': video_id,
@@ -24,6 +24,7 @@ from ..compat import (
    compat_urllib_parse_urlencode,
    compat_urlparse,
 )
+from ..downloader.f4m import remove_encrypted_media
 from ..utils import (
    NO_DEFAULT,
    age_restricted,
@@ -989,6 +990,11 @@ class InfoExtractor(object):
        if not media_nodes:
            manifest_version = '2.0'
            media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
+        # Remove unsupported DRM protected media from final formats
+        # rendition (see https://github.com/rg3/youtube-dl/issues/8573).
+        media_nodes = remove_encrypted_media(media_nodes)
+        if not media_nodes:
+            return formats
        base_url = xpath_text(
            manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
            'base URL', default=None)
@@ -1021,8 +1027,6 @@ class InfoExtractor(object):
                'height': int_or_none(media_el.attrib.get('height')),
                'preference': preference,
            })
-        self._sort_formats(formats)
-
        return formats

    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
@@ -1143,7 +1147,6 @@ class InfoExtractor(object):
                    last_media = None
                formats.append(f)
                last_info = {}
-        self._sort_formats(formats)
        return formats

    @staticmethod
@@ -1317,8 +1320,6 @@ class InfoExtractor(object):
                })
                continue

-        self._sort_formats(formats)
-
        return formats

    def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
@@ -1536,7 +1537,6 @@ class InfoExtractor(object):
                            existing_format.update(f)
                    else:
                        self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
-        self._sort_formats(formats)
        return formats

    def _live_title(self, name):
@@ -57,6 +57,7 @@ class CWTVIE(InfoExtractor):

        formats = self._extract_m3u8_formats(
            video_data['videos']['variantplaylist']['uri'], video_id, 'mp4')
+        self._sort_formats(formats)

        thumbnails = [{
            'url': image['uri'],
@@ -38,6 +38,7 @@ class DFBIE(InfoExtractor):
        token_el = f4m_info.find('token')
        manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
        formats = self._extract_f4m_formats(manifest_url, display_id)
+        self._sort_formats(formats)

        return {
            'id': video_id,
@@ -63,18 +63,23 @@ class DiscoveryIE(InfoExtractor):

        video_title = info.get('playlist_title') or info.get('video_title')

-        entries = [{
-            'id': compat_str(video_info['id']),
-            'formats': self._extract_m3u8_formats(
+        entries = []
+
+        for idx, video_info in enumerate(info['playlist']):
+            formats = self._extract_m3u8_formats(
                video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
-                note='Download m3u8 information for video %d' % (idx + 1)),
-            'title': video_info['title'],
-            'description': video_info.get('description'),
-            'duration': parse_duration(video_info.get('video_length')),
-            'webpage_url': video_info.get('href') or video_info.get('url'),
-            'thumbnail': video_info.get('thumbnailURL'),
-            'alt_title': video_info.get('secondary_title'),
-            'timestamp': parse_iso8601(video_info.get('publishedDate')),
-        } for idx, video_info in enumerate(info['playlist'])]
+                note='Download m3u8 information for video %d' % (idx + 1))
+            self._sort_formats(formats)
+            entries.append({
+                'id': compat_str(video_info['id']),
+                'formats': formats,
+                'title': video_info['title'],
+                'description': video_info.get('description'),
+                'duration': parse_duration(video_info.get('video_length')),
+                'webpage_url': video_info.get('href') or video_info.get('url'),
+                'thumbnail': video_info.get('thumbnailURL'),
+                'alt_title': video_info.get('secondary_title'),
+                'timestamp': parse_iso8601(video_info.get('publishedDate')),
+            })

        return self.playlist_result(entries, display_id, video_title)
@@ -118,6 +118,8 @@ class DPlayIE(InfoExtractor):
                if info.get(protocol):
                    extract_formats(protocol, info[protocol])

+        self._sort_formats(formats)
+
        return {
            'id': video_id,
            'display_id': display_id,
@@ -39,13 +39,13 @@ class DWIE(InfoExtractor):
        hidden_inputs = self._hidden_inputs(webpage)
        title = hidden_inputs['media_title']

-        formats = []
        if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
            formats = self._extract_smil_formats(
                'http://www.dw.com/smil/v-%s' % media_id, media_id,
                transform_source=lambda s: s.replace(
                    'rtmp://tv-od.dw.de/flash/',
                    'http://tv-download.dw.de/dwtv_video/flv/'))
+            self._sort_formats(formats)
        else:
            formats = [{'url': hidden_inputs['file_name']}]

@@ -1124,7 +1124,23 @@ class GenericIE(InfoExtractor):
                # m3u8 downloads
                'skip_download': True,
            }
-        }
+        },
+        # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
+        # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
+        {
+            'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
+            'info_dict': {
+                'id': '4785848093001',
+                'ext': 'mp4',
+                'title': 'The Cardinal Pell Interview',
+                'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
+                'uploader': 'GlobeCast Australia - GlobeStream',
+            },
+            'params': {
+                # m3u8 downloads
+                'skip_download': True,
+            },
+        },
    ]

    def report_following_redirect(self, new_url):
@@ -1294,6 +1310,7 @@ class GenericIE(InfoExtractor):
                    'vcodec': 'none' if m.group('type') == 'audio' else None
                }]
                info_dict['direct'] = True
+            self._sort_formats(formats)
            info_dict['formats'] = formats
            return info_dict

@@ -1320,6 +1337,7 @@ class GenericIE(InfoExtractor):
        # Is it an M3U playlist?
        if first_bytes.startswith(b'#EXTM3U'):
            info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
+            self._sort_formats(info_dict['formats'])
            return info_dict

        # Maybe it's a direct link to a video?
@@ -1344,15 +1362,19 @@ class GenericIE(InfoExtractor):
            if doc.tag == 'rss':
                return self._extract_rss(url, video_id, doc)
            elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
-                return self._parse_smil(doc, url, video_id)
+                smil = self._parse_smil(doc, url, video_id)
+                self._sort_formats(smil['formats'])
+                return smil
            elif doc.tag == '{http://xspf.org/ns/0/}playlist':
                return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
            elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                info_dict['formats'] = self._parse_mpd_formats(
                    doc, video_id, mpd_base_url=url.rpartition('/')[0])
+                self._sort_formats(info_dict['formats'])
                return info_dict
            elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
                info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
+                self._sort_formats(info_dict['formats'])
                return info_dict
        except compat_xml_parse_error:
            pass
@@ -2037,6 +2059,9 @@ class GenericIE(InfoExtractor):
            else:
                entry_info_dict['url'] = video_url

+            if entry_info_dict.get('formats'):
+                self._sort_formats(entry_info_dict['formats'])
+
            entries.append(entry_info_dict)

        if len(entries) == 1:
@@ -130,6 +130,7 @@ class Laola1TvIE(InfoExtractor):
        formats = self._extract_f4m_formats(
            '%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth),
            video_id, f4m_id='hds')
+        self._sort_formats(formats)

        categories_str = _v('meta_sports')
        categories = categories_str.split(',') if categories_str else []
@@ -37,6 +37,7 @@ class LRTIE(InfoExtractor):
            r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
            webpage, 'm3u8 url', group='url')
        formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+        self._sort_formats(formats)

        thumbnail = self._og_search_thumbnail(webpage)
        description = self._og_search_description(webpage)
@@ -219,7 +219,7 @@ class LyndaCourseIE(LyndaBaseIE):
                'Course %s does not exist' % course_id, expected=True)

        unaccessible_videos = 0
-        videos = []
+        entries = []

        # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
        # by single video API anymore
@@ -229,20 +229,22 @@ class LyndaCourseIE(LyndaBaseIE):
                if video.get('HasAccess') is False:
                    unaccessible_videos += 1
                    continue
-                if video.get('ID'):
-                    videos.append(video['ID'])
+                video_id = video.get('ID')
+                if video_id:
+                    entries.append({
+                        '_type': 'url_transparent',
+                        'url': 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
+                        'ie_key': LyndaIE.ie_key(),
+                        'chapter': chapter.get('Title'),
+                        'chapter_number': int_or_none(chapter.get('ChapterIndex')),
+                        'chapter_id': compat_str(chapter.get('ID')),
+                    })

        if unaccessible_videos > 0:
            self._downloader.report_warning(
                '%s videos are only available for members (or paid members) and will not be downloaded. '
                % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)

-        entries = [
-            self.url_result(
-                'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
-                'Lynda')
-            for video_id in videos]
-
        course_title = course.get('Title')

        return self.playlist_result(entries, course_id, course_title)
@@ -13,7 +13,7 @@ from ..utils import (
 class MailRuIE(InfoExtractor):
    IE_NAME = 'mailru'
    IE_DESC = 'Видео@Mail.Ru'
-    _VALID_URL = r'https?://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'

    _TESTS = [
        {
@@ -61,6 +61,10 @@ class MailRuIE(InfoExtractor):
                'duration': 6001,
            },
            'skip': 'Not accessible from Travis CI server',
+        },
+        {
+            'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html',
+            'only_matching': True,
        }
    ]

@@ -47,6 +47,7 @@ class MatchTVIE(InfoExtractor):
        video_url = self._download_json(request, video_id)['data']['videoUrl']
        f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
        formats = self._extract_f4m_formats(f4m_url, video_id)
+        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': self._live_title('Матч ТВ - Прямой эфир'),
@@ -67,6 +67,7 @@ class MiTeleIE(InfoExtractor):
            formats.extend(self._extract_f4m_formats(
                file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
                display_id, f4m_id=loc))
+        self._sort_formats(formats)

        title = self._search_regex(
            r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title')
@@ -63,6 +63,7 @@ class NRKIE(InfoExtractor):
        if determine_ext(media_url) == 'f4m':
            formats = self._extract_f4m_formats(
                media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds')
+            self._sort_formats(formats)
        else:
            formats = [{
                'url': media_url,
@@ -279,13 +279,18 @@ class PluralsightCourseIE(PluralsightBaseIE):
            course_id, 'Downloading course data JSON')

        entries = []
-        for module in course_data:
+        for num, module in enumerate(course_data, 1):
            for clip in module.get('clips', []):
                player_parameters = clip.get('playerParameters')
                if not player_parameters:
                    continue
-                entries.append(self.url_result(
-                    '%s/training/player?%s' % (self._API_BASE, player_parameters),
-                    'Pluralsight'))
+                entries.append({
+                    '_type': 'url_transparent',
+                    'url': '%s/training/player?%s' % (self._API_BASE, player_parameters),
+                    'ie_key': PluralsightIE.ie_key(),
+                    'chapter': module.get('title'),
+                    'chapter_number': num,
+                    'chapter_id': module.get('moduleRef'),
+                })

        return self.playlist_result(entries, course_id, title, description)
@@ -31,6 +31,7 @@ class RestudyIE(InfoExtractor):
        formats = self._extract_smil_formats(
            'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id,
            video_id)
+        self._sort_formats(formats)

        return {
            'id': video_id,
@@ -49,6 +49,7 @@ class RteIE(InfoExtractor):
        # f4m_url = server + relative_url
        f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
        f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
+        self._sort_formats(f4m_formats)

        return {
            'id': video_id,
@@ -209,6 +209,7 @@ class RTVELiveIE(InfoExtractor):
        png = self._download_webpage(png_url, video_id, 'Downloading url information')
        m3u8_url = _decrypt_url(png)
        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+        self._sort_formats(formats)

        return {
            'id': video_id,
@@ -38,6 +38,7 @@ class RTVNHIE(InfoExtractor):
                    item['file'], video_id, ext='mp4', entry_protocol='m3u8_native'))
            elif item.get('type') == '':
                formats.append({'url': item['file']})
+        self._sort_formats(formats)

        return {
            'id': video_id,
@@ -77,6 +77,7 @@ class ShahidIE(InfoExtractor):
            raise ExtractorError('This video is DRM protected.', expected=True)

        formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
+        self._sort_formats(formats)

        video = self._download_json(
            '%s/%s/%s?%s' % (
@@ -99,6 +99,7 @@ class SportBoxEmbedIE(InfoExtractor):
            webpage, 'hls file')

        formats = self._extract_m3u8_formats(hls, video_id, 'mp4')
+        self._sort_formats(formats)

        title = self._search_regex(
            r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title')
@@ -82,6 +82,7 @@ class TelecincoIE(InfoExtractor):
        )
        formats = self._extract_m3u8_formats(
            token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native')
+        self._sort_formats(formats)

        return {
            'id': embed_data['videoId'],
@@ -69,6 +69,7 @@ class TubiTvIE(InfoExtractor):
        apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu')
        m3u8_url = codecs.decode(apu, 'rot_13')[::-1]
        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+        self._sort_formats(formats)

        return {
            'id': video_id,
@@ -102,6 +102,9 @@ class TwitterCardIE(TwitterBaseIE):
            r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'),
            video_id)

+        if config.get('source_type') == 'vine':
+            return self.url_result(config['player_url'], 'Vine')
+
        def _search_dimensions_in_video_url(a_format, video_url):
            m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
            if m:
@@ -110,10 +113,9 @@ class TwitterCardIE(TwitterBaseIE):
                    'height': int(m.group('height')),
                })

-        playlist = config.get('playlist')
-        if playlist:
-            video_url = playlist[0]['source']
+        video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')

+        if video_url:
            f = {
                'url': video_url,
            }
@@ -185,7 +187,6 @@ class TwitterIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
            'thumbnail': 're:^https?://.*\.jpg',
-            'duration': 12.922,
            'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
            'uploader': 'FREE THE NIPPLE',
            'uploader_id': 'freethenipple',
@@ -247,6 +248,18 @@ class TwitterIE(InfoExtractor):
        'params': {
            'skip_download': True,  # requires ffmpeg
        },
+    }, {
+        'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
+        'md5': '89a15ed345d13b86e9a5a5e051fa308a',
+        'info_dict': {
+            'id': 'MIOxnrUteUd',
+            'ext': 'mp4',
+            'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
+            'uploader': 'TAKUMA',
+            'uploader_id': '1004126642786242560',
+            'upload_date': '20140615',
+        },
+        'add_ie': ['Vine'],
    }]

    def _real_extract(self, url):
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals

+import re
+
 from .common import InfoExtractor
 from ..compat import (
    compat_HTTPError,
@@ -8,6 +10,8 @@ from ..compat import (
    compat_urlparse,
 )
 from ..utils import (
+    determine_ext,
+    extract_attributes,
    ExtractorError,
    float_or_none,
    int_or_none,
@@ -51,21 +55,26 @@ class UdemyIE(InfoExtractor):
    }]

    def _enroll_course(self, base_url, webpage, course_id):
+        def combine_url(base_url, url):
+            return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
+
        checkout_url = unescapeHTML(self._search_regex(
-            r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/payment/checkout/.+?)\1',
+            r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/payment/checkout/.+?)\1',
            webpage, 'checkout url', group='url', default=None))
        if checkout_url:
            raise ExtractorError(
                'Course %s is not free. You have to pay for it before you can download. '
-                'Use this URL to confirm purchase: %s' % (course_id, checkout_url), expected=True)
+                'Use this URL to confirm purchase: %s'
+                % (course_id, combine_url(base_url, checkout_url)),
+                expected=True)

        enroll_url = unescapeHTML(self._search_regex(
            r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1',
            webpage, 'enroll url', group='url', default=None))
        if enroll_url:
-            if not enroll_url.startswith('http'):
-                enroll_url = compat_urlparse.urljoin(base_url, enroll_url)
-            webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course')
+            webpage = self._download_webpage(
+                combine_url(base_url, enroll_url),
+                course_id, 'Enrolling in the course')
            if '>You have enrolled in' in webpage:
                self.to_screen('%s: Successfully enrolled in the course' % course_id)

@@ -73,11 +82,8 @@ class UdemyIE(InfoExtractor):
        return self._download_json(
            'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?%s' % (
                course_id, lecture_id, compat_urllib_parse_urlencode({
-                    'video_only': '',
-                    'auto_play': '',
-                    'fields[lecture]': 'title,description,asset',
+                    'fields[lecture]': 'title,description,view_html,asset',
                    'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data',
-                    'instructorPreviewMode': 'False',
                })),
            lecture_id, 'Downloading lecture JSON')

@@ -200,7 +206,7 @@ class UdemyIE(InfoExtractor):
        def extract_output_format(src):
            return {
                'url': src['url'],
-                'format_id': '%sp' % (src.get('label') or format_id),
+                'format_id': '%sp' % (src.get('height') or format_id),
                'width': int_or_none(src.get('width')),
                'height': int_or_none(src.get('height')),
                'vbr': int_or_none(src.get('video_bitrate_in_kbps')),
@@ -217,9 +223,13 @@ class UdemyIE(InfoExtractor):
        if not isinstance(outputs, dict):
            outputs = {}

-        for format_id, output in outputs.items():
-            if isinstance(output, dict) and output.get('url'):
-                formats.append(extract_output_format(output))
+        def add_output_format_meta(f, key):
+            output = outputs.get(key)
+            if isinstance(output, dict):
+                output_format = extract_output_format(output)
+                output_format.update(f)
+                return output_format
+            return f

        download_urls = asset.get('download_urls')
        if isinstance(download_urls, dict):
@@ -232,21 +242,48 @@ class UdemyIE(InfoExtractor):
                    format_id = format_.get('label')
                    f = {
                        'url': format_['file'],
+                        'format_id': '%sp' % format_id,
                        'height': int_or_none(format_id),
                    }
                    if format_id:
                        # Some videos contain additional metadata (e.g.
                        # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
-                        output = outputs.get(format_id)
-                        if isinstance(output, dict):
-                            output_format = extract_output_format(output)
-                            output_format.update(f)
-                            f = output_format
-                        else:
-                            f['format_id'] = '%sp' % format_id
+                        f = add_output_format_meta(f, format_id)
                    formats.append(f)

-        self._sort_formats(formats)
+        view_html = lecture.get('view_html')
+        if view_html:
+            view_html_urls = set()
+            for source in re.findall(r'<source[^>]+>', view_html):
+                attributes = extract_attributes(source)
+                src = attributes.get('src')
+                if not src:
+                    continue
+                res = attributes.get('data-res')
+                height = int_or_none(res)
+                if src in view_html_urls:
+                    continue
+                view_html_urls.add(src)
+                if attributes.get('type') == 'application/x-mpegURL' or determine_ext(src) == 'm3u8':
+                    m3u8_formats = self._extract_m3u8_formats(
+                        src, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id='hls', fatal=False)
+                    for f in m3u8_formats:
+                        m = re.search(r'/hls_(?P<height>\d{3,4})_(?P<tbr>\d{2,})/', f['url'])
+                        if m:
+                            if not f.get('height'):
+                                f['height'] = int(m.group('height'))
+                            if not f.get('tbr'):
+                                f['tbr'] = int(m.group('tbr'))
+                    formats.extend(m3u8_formats)
+                else:
+                    formats.append(add_output_format_meta({
+                        'url': src,
+                        'format_id': '%dp' % height if height else None,
+                        'height': height,
+                    }, res))
+
+        self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))

        return {
            'id': video_id,
@@ -152,7 +152,7 @@ class VevoIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)

-        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
+        json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
        response = self._download_json(
            json_url, video_id, 'Downloading video info', 'Unable to download info')
        video_info = response.get('video') or {}
@@ -111,6 +111,7 @@ class VideomoreIE(InfoExtractor):

        video_url = xpath_text(video, './/video_url', 'video url', fatal=True)
        formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds')
+        self._sort_formats(formats)

        data = self._download_json(
            'http://videomore.ru/video/tracks/%s.json' % video_id,
@@ -50,6 +50,7 @@ class VierIE(InfoExtractor):

        playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
        formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
+        self._sort_formats(formats)

        title = self._og_search_title(webpage, default=display_id)
        description = self._og_search_description(webpage, default=None)
@@ -151,6 +151,7 @@ class ViideaIE(InfoExtractor):
                smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id)
                smil = self._download_smil(smil_url, lecture_id)
                info = self._parse_smil(smil, smil_url, lecture_id)
+                self._sort_formats(info['formats'])
                info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id)
                info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id)
                if multipart:
@@ -41,10 +41,12 @@ class YnetIE(InfoExtractor):
        m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
        if m:
            title = m.group('title')
+        formats = self._extract_f4m_formats(f4m_url, video_id)
+        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
-            'formats': self._extract_f4m_formats(f4m_url, video_id),
+            'formats': formats,
            'thumbnail': self._og_search_thumbnail(webpage),
        }
@@ -234,7 +234,9 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
    def _process_page(self, content):
-        for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
+        for playlist_id in orderedSet(re.findall(
+                r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
+                content)):
            yield self.url_result(
                'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')

@@ -417,9 +417,12 @@ def sanitize_path(s):

 # Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
 # unwanted failures due to missing protocol
+def sanitize_url(url):
+    return 'http:%s' % url if url.startswith('//') else url
+
+
 def sanitized_Request(url, *args, **kwargs):
-    return compat_urllib_request.Request(
-        'http:%s' % url if url.startswith('//') else url, *args, **kwargs)
+    return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)


 def orderedSet(iterable):
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2016.03.26'
+__version__ = '2016.03.27'
Author	SHA1	Message	Date
Philipp Hagemeister	a122e7080b	release 2016.03.27	2016-03-27 16:56:33 +02:00
Sergey M․	b22ca76204	[extractor/common] Filter out unsupported encrypted media for f4m formats (Closes #8573 )	2016-03-27 07:42:38 +06:00
Sergey M․	f7df343b4a	[downloader/f4m] Extract routine for removing unsupported encrypted media	2016-03-27 07:41:19 +06:00
Sergey M․	19dbaeece3	Remove _sort_formats from _extract_*_formats methods Now _sort_formats should be called explicitly. _sort_formats has been added to all the necessary places in code. Closes #8051	2016-03-27 07:03:08 +06:00
Yen Chi Hsuan	395fd4b08a	[twitter] Handle another form of embedded Vine Fixes #8996	2016-03-27 04:36:02 +08:00
Sergey M․	8018028d0f	[pluralsight] Extract chapter metadata (Closes #8993 )	2016-03-27 02:10:52 +06:00
Sergey M․	00322ad4fd	[lynda] Extract chapter metadata (#8993 )	2016-03-27 02:00:36 +06:00
Sergey M․	4cf3489c6e	[vevo] Update videoservice API URL (Closes #8900 )	2016-03-27 01:11:11 +06:00
Sergey M․	b24ab3e341	[udemy] Improve paid course detection	2016-03-27 00:09:12 +06:00
Sergey M․	af4116f4f0	[udemy] Improve format_id	2016-03-27 00:02:52 +06:00
Sergey M․	f973e5d54e	[udemy] Drop outputs' formats Always results in 403	2016-03-26 23:55:07 +06:00
Sergey M․	62f55aa68a	[udemy] Add outputs metadata to view_html formats	2016-03-26 23:54:12 +06:00
Sergey M․	02d7634d24	[udemy] Fix outputs' formats format_id	2016-03-26 23:43:25 +06:00
Sergey M․	48dce58ca9	[udemy] Use custom sorting	2016-03-26 23:42:46 +06:00
Sergey M․	efcba804f6	[udemy] Extract formats from view_html (Closes #8979 )	2016-03-26 23:42:34 +06:00
Sergey M․	6dee688e6d	[youtube:playlistsbase] Restrict playlist regex (Closes #8986 )	2016-03-26 20:42:18 +06:00
Sergey M․	eedb7ba536	[YoutubeDL] Sort imports	2016-03-26 19:40:33 +06:00
Sergey M․	dcf77cf1a7	[YoutubeDL] Sanitize final URLs (Closes #8991 )	2016-03-26 19:37:41 +06:00
Sergey M․	17bcc626bf	[utils] Extract sanitize_url routine	2016-03-26 19:33:57 +06:00
Sergey M․	b5a5bbf376	[mailru] Extend _VALID_URL (Closes #8990 )	2016-03-26 19:15:32 +06:00
Yen Chi Hsuan	e68d3a010f	[twitter] Fix extraction (closes #8966 ) HLS and DASH formats are no longer appeared in test cases. I keep them for fear of triggering new errors.	2016-03-26 18:34:51 +08:00
Yen Chi Hsuan	d10fe8358c	[generic] Add a test case for brightcove embed Closes #8862	2016-03-26 18:30:43 +08:00
Yen Chi Hsuan	d6c340cae5	[brightcove] Extract more formats (#8862 )	2016-03-26 18:21:07 +08:00
Yen Chi Hsuan	5964b598ff	[brightcove] Support alternative BrightcoveExperience layout The full URL lays in the `data` attribute of <object> (#8862)	2016-03-26 17:47:32 +08:00