1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2026-05-10 23:53:44 +00:00

Compare commits

...

24 Commits

Author SHA1 Message Date
Philipp Hagemeister a122e7080b release 2016.03.27 2016-03-27 16:56:33 +02:00
Sergey M․ b22ca76204 [extractor/common] Filter out unsupported encrypted media for f4m formats (Closes #8573) 2016-03-27 07:42:38 +06:00
Sergey M․ f7df343b4a [downloader/f4m] Extract routine for removing unsupported encrypted media 2016-03-27 07:41:19 +06:00
Sergey M․ 19dbaeece3 Remove _sort_formats from _extract_*_formats methods
Now _sort_formats should be called explicitly.
_sort_formats has been added to all the necessary places in code.

Closes #8051
2016-03-27 07:03:08 +06:00
Yen Chi Hsuan 395fd4b08a [twitter] Handle another form of embedded Vine
Fixes #8996
2016-03-27 04:36:02 +08:00
Sergey M․ 8018028d0f [pluralsight] Extract chapter metadata (Closes #8993) 2016-03-27 02:10:52 +06:00
Sergey M․ 00322ad4fd [lynda] Extract chapter metadata (#8993) 2016-03-27 02:00:36 +06:00
Sergey M․ 4cf3489c6e [vevo] Update videoservice API URL (Closes #8900) 2016-03-27 01:11:11 +06:00
Sergey M․ b24ab3e341 [udemy] Improve paid course detection 2016-03-27 00:09:12 +06:00
Sergey M․ af4116f4f0 [udemy] Improve format_id 2016-03-27 00:02:52 +06:00
Sergey M․ f973e5d54e [udemy] Drop outputs' formats
Always results in 403
2016-03-26 23:55:07 +06:00
Sergey M․ 62f55aa68a [udemy] Add outputs metadata to view_html formats 2016-03-26 23:54:12 +06:00
Sergey M․ 02d7634d24 [udemy] Fix outputs' formats format_id 2016-03-26 23:43:25 +06:00
Sergey M․ 48dce58ca9 [udemy] Use custom sorting 2016-03-26 23:42:46 +06:00
Sergey M․ efcba804f6 [udemy] Extract formats from view_html (Closes #8979) 2016-03-26 23:42:34 +06:00
Sergey M․ 6dee688e6d [youtube:playlistsbase] Restrict playlist regex (Closes #8986) 2016-03-26 20:42:18 +06:00
Sergey M․ eedb7ba536 [YoutubeDL] Sort imports 2016-03-26 19:40:33 +06:00
Sergey M․ dcf77cf1a7 [YoutubeDL] Sanitize final URLs (Closes #8991) 2016-03-26 19:37:41 +06:00
Sergey M․ 17bcc626bf [utils] Extract sanitize_url routine 2016-03-26 19:33:57 +06:00
Sergey M․ b5a5bbf376 [mailru] Extend _VALID_URL (Closes #8990) 2016-03-26 19:15:32 +06:00
Yen Chi Hsuan e68d3a010f [twitter] Fix extraction (closes #8966)
HLS and DASH formats are no longer appeared in test cases. I keep them
for fear of triggering new errors.
2016-03-26 18:34:51 +08:00
Yen Chi Hsuan d10fe8358c [generic] Add a test case for brightcove embed
Closes #8862
2016-03-26 18:30:43 +08:00
Yen Chi Hsuan d6c340cae5 [brightcove] Extract more formats (#8862) 2016-03-26 18:21:07 +08:00
Yen Chi Hsuan 5964b598ff [brightcove] Support alternative BrightcoveExperience layout
The full URL lays in the `data` attribute of <object> (#8862)
2016-03-26 17:47:32 +08:00
41 changed files with 223 additions and 78 deletions
+10 -5
View File
@@ -39,6 +39,8 @@ from .compat import (
compat_urllib_request_DataHandler,
)
from .utils import (
age_restricted,
args_to_str,
ContentTooShortError,
date_from_str,
DateRange,
@@ -58,13 +60,16 @@ from .utils import (
PagedList,
parse_filesize,
PerRequestProxyHandler,
PostProcessingError,
platform_name,
PostProcessingError,
preferredencoding,
prepend_extension,
render_table,
replace_extension,
SameFileError,
sanitize_filename,
sanitize_path,
sanitize_url,
sanitized_Request,
std_headers,
subtitles_filename,
@@ -75,10 +80,6 @@ from .utils import (
write_string,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
prepend_extension,
replace_extension,
args_to_str,
age_restricted,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractors
@@ -1229,6 +1230,7 @@ class YoutubeDL(object):
t.get('preference'), t.get('width'), t.get('height'),
t.get('id'), t.get('url')))
for i, t in enumerate(thumbnails):
t['url'] = sanitize_url(t['url'])
if t.get('width') and t.get('height'):
t['resolution'] = '%dx%d' % (t['width'], t['height'])
if t.get('id') is None:
@@ -1263,6 +1265,7 @@ class YoutubeDL(object):
if subtitles:
for _, subtitle in subtitles.items():
for subtitle_format in subtitle:
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
if 'ext' not in subtitle_format:
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
@@ -1292,6 +1295,8 @@ class YoutubeDL(object):
if 'url' not in format:
raise ExtractorError('Missing "url" key in result (index %d)' % i)
format['url'] = sanitize_url(format['url'])
if format.get('format_id') is None:
format['format_id'] = compat_str(i)
else:
+7 -3
View File
@@ -223,6 +223,12 @@ def write_metadata_tag(stream, metadata):
write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
def remove_encrypted_media(media):
return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
'drmAdditionalHeaderSetId' not in e.attrib,
media))
def _add_ns(prop):
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
@@ -244,9 +250,7 @@ class F4mFD(FragmentFD):
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
if 'id' not in e.attrib:
self.report_error('Missing ID in f4m DRM')
media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
'drmAdditionalHeaderSetId' not in e.attrib,
media))
media = remove_encrypted_media(media)
if not media:
self.report_error('Unsupported DRM')
return media
+1
View File
@@ -44,6 +44,7 @@ class Abc7NewsIE(InfoExtractor):
'contentURL', webpage, 'm3u8 url', fatal=True)
formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
self._sort_formats(formats)
title = self._og_search_title(webpage).strip()
description = self._og_search_description(webpage).strip()
+1
View File
@@ -120,6 +120,7 @@ class AzubuLiveIE(InfoExtractor):
bc_info = self._download_json(req, user)
m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
self._sort_formats(formats)
return {
'id': info['id'],
+1
View File
@@ -94,6 +94,7 @@ class BetIE(InfoExtractor):
xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
formats = self._extract_smil_formats(smil_url, display_id)
self._sort_formats(formats)
return {
'id': video_id,
+17 -4
View File
@@ -136,13 +136,16 @@ class BrightcoveLegacyIE(InfoExtractor):
else:
flashvars = {}
data_url = object_doc.attrib.get('data', '')
data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query)
def find_param(name):
if name in flashvars:
return flashvars[name]
node = find_xpath_attr(object_doc, './param', 'name', name)
if node is not None:
return node.attrib['value']
return None
return data_url_params.get(name)
params = {}
@@ -294,7 +297,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'uploader': video_info.get('publisherName'),
}
renditions = video_info.get('renditions')
renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
if renditions:
formats = []
for rend in renditions:
@@ -316,13 +319,23 @@ class BrightcoveLegacyIE(InfoExtractor):
if ext is None:
ext = determine_ext(url)
size = rend.get('size')
formats.append({
a_format = {
'url': url,
'ext': ext,
'height': rend.get('frameHeight'),
'width': rend.get('frameWidth'),
'filesize': size if size != 0 else None,
})
}
# m3u8 manifests with remote == false are media playlists
# Not calling _extract_m3u8_formats here to save network traffic
if ext == 'm3u8':
a_format.update({
'ext': 'mp4',
'protocol': 'm3u8',
})
formats.append(a_format)
self._sort_formats(formats)
info['formats'] = formats
elif video_info.get('FLVFullLengthURL') is not None:
+1
View File
@@ -122,6 +122,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
for entry in f4m_formats:
# URLs without the extra param induce an 404 error
entry.update({'extra_param_to_segment_url': hdcore_sign})
self._sort_formats(f4m_formats)
return {
'id': video_id,
+1
View File
@@ -48,6 +48,7 @@ class ChaturbateIE(InfoExtractor):
raise ExtractorError('Unable to find stream URL')
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
self._sort_formats(formats)
return {
'id': video_id,
+6 -6
View File
@@ -24,6 +24,7 @@ from ..compat import (
compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..downloader.f4m import remove_encrypted_media
from ..utils import (
NO_DEFAULT,
age_restricted,
@@ -989,6 +990,11 @@ class InfoExtractor(object):
if not media_nodes:
manifest_version = '2.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
# Remove unsupported DRM protected media from final formats
# rendition (see https://github.com/rg3/youtube-dl/issues/8573).
media_nodes = remove_encrypted_media(media_nodes)
if not media_nodes:
return formats
base_url = xpath_text(
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
'base URL', default=None)
@@ -1021,8 +1027,6 @@ class InfoExtractor(object):
'height': int_or_none(media_el.attrib.get('height')),
'preference': preference,
})
self._sort_formats(formats)
return formats
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
@@ -1143,7 +1147,6 @@ class InfoExtractor(object):
last_media = None
formats.append(f)
last_info = {}
self._sort_formats(formats)
return formats
@staticmethod
@@ -1317,8 +1320,6 @@ class InfoExtractor(object):
})
continue
self._sort_formats(formats)
return formats
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
@@ -1536,7 +1537,6 @@ class InfoExtractor(object):
existing_format.update(f)
else:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
self._sort_formats(formats)
return formats
def _live_title(self, name):
+1
View File
@@ -57,6 +57,7 @@ class CWTVIE(InfoExtractor):
formats = self._extract_m3u8_formats(
video_data['videos']['variantplaylist']['uri'], video_id, 'mp4')
self._sort_formats(formats)
thumbnails = [{
'url': image['uri'],
+1
View File
@@ -38,6 +38,7 @@ class DFBIE(InfoExtractor):
token_el = f4m_info.find('token')
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
formats = self._extract_f4m_formats(manifest_url, display_id)
self._sort_formats(formats)
return {
'id': video_id,
+17 -12
View File
@@ -63,18 +63,23 @@ class DiscoveryIE(InfoExtractor):
video_title = info.get('playlist_title') or info.get('video_title')
entries = [{
'id': compat_str(video_info['id']),
'formats': self._extract_m3u8_formats(
entries = []
for idx, video_info in enumerate(info['playlist']):
formats = self._extract_m3u8_formats(
video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
note='Download m3u8 information for video %d' % (idx + 1)),
'title': video_info['title'],
'description': video_info.get('description'),
'duration': parse_duration(video_info.get('video_length')),
'webpage_url': video_info.get('href') or video_info.get('url'),
'thumbnail': video_info.get('thumbnailURL'),
'alt_title': video_info.get('secondary_title'),
'timestamp': parse_iso8601(video_info.get('publishedDate')),
} for idx, video_info in enumerate(info['playlist'])]
note='Download m3u8 information for video %d' % (idx + 1))
self._sort_formats(formats)
entries.append({
'id': compat_str(video_info['id']),
'formats': formats,
'title': video_info['title'],
'description': video_info.get('description'),
'duration': parse_duration(video_info.get('video_length')),
'webpage_url': video_info.get('href') or video_info.get('url'),
'thumbnail': video_info.get('thumbnailURL'),
'alt_title': video_info.get('secondary_title'),
'timestamp': parse_iso8601(video_info.get('publishedDate')),
})
return self.playlist_result(entries, display_id, video_title)
+2
View File
@@ -118,6 +118,8 @@ class DPlayIE(InfoExtractor):
if info.get(protocol):
extract_formats(protocol, info[protocol])
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
+1 -1
View File
@@ -39,13 +39,13 @@ class DWIE(InfoExtractor):
hidden_inputs = self._hidden_inputs(webpage)
title = hidden_inputs['media_title']
formats = []
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
formats = self._extract_smil_formats(
'http://www.dw.com/smil/v-%s' % media_id, media_id,
transform_source=lambda s: s.replace(
'rtmp://tv-od.dw.de/flash/',
'http://tv-download.dw.de/dwtv_video/flv/'))
self._sort_formats(formats)
else:
formats = [{'url': hidden_inputs['file_name']}]
+27 -2
View File
@@ -1124,7 +1124,23 @@ class GenericIE(InfoExtractor):
# m3u8 downloads
'skip_download': True,
}
}
},
# Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
# This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
{
'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
'info_dict': {
'id': '4785848093001',
'ext': 'mp4',
'title': 'The Cardinal Pell Interview',
'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
'uploader': 'GlobeCast Australia - GlobeStream',
},
'params': {
# m3u8 downloads
'skip_download': True,
},
},
]
def report_following_redirect(self, new_url):
@@ -1294,6 +1310,7 @@ class GenericIE(InfoExtractor):
'vcodec': 'none' if m.group('type') == 'audio' else None
}]
info_dict['direct'] = True
self._sort_formats(formats)
info_dict['formats'] = formats
return info_dict
@@ -1320,6 +1337,7 @@ class GenericIE(InfoExtractor):
# Is it an M3U playlist?
if first_bytes.startswith(b'#EXTM3U'):
info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
self._sort_formats(info_dict['formats'])
return info_dict
# Maybe it's a direct link to a video?
@@ -1344,15 +1362,19 @@ class GenericIE(InfoExtractor):
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
return self._parse_smil(doc, url, video_id)
smil = self._parse_smil(doc, url, video_id)
self._sort_formats(smil['formats'])
return smil
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats(
doc, video_id, mpd_base_url=url.rpartition('/')[0])
self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
self._sort_formats(info_dict['formats'])
return info_dict
except compat_xml_parse_error:
pass
@@ -2037,6 +2059,9 @@ class GenericIE(InfoExtractor):
else:
entry_info_dict['url'] = video_url
if entry_info_dict.get('formats'):
self._sort_formats(entry_info_dict['formats'])
entries.append(entry_info_dict)
if len(entries) == 1:
+1
View File
@@ -130,6 +130,7 @@ class Laola1TvIE(InfoExtractor):
formats = self._extract_f4m_formats(
'%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth),
video_id, f4m_id='hds')
self._sort_formats(formats)
categories_str = _v('meta_sports')
categories = categories_str.split(',') if categories_str else []
+1
View File
@@ -37,6 +37,7 @@ class LRTIE(InfoExtractor):
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
webpage, 'm3u8 url', group='url')
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage)
+11 -9
View File
@@ -219,7 +219,7 @@ class LyndaCourseIE(LyndaBaseIE):
'Course %s does not exist' % course_id, expected=True)
unaccessible_videos = 0
videos = []
entries = []
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
# by single video API anymore
@@ -229,20 +229,22 @@ class LyndaCourseIE(LyndaBaseIE):
if video.get('HasAccess') is False:
unaccessible_videos += 1
continue
if video.get('ID'):
videos.append(video['ID'])
video_id = video.get('ID')
if video_id:
entries.append({
'_type': 'url_transparent',
'url': 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
'ie_key': LyndaIE.ie_key(),
'chapter': chapter.get('Title'),
'chapter_number': int_or_none(chapter.get('ChapterIndex')),
'chapter_id': compat_str(chapter.get('ID')),
})
if unaccessible_videos > 0:
self._downloader.report_warning(
'%s videos are only available for members (or paid members) and will not be downloaded. '
% unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
entries = [
self.url_result(
'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
'Lynda')
for video_id in videos]
course_title = course.get('Title')
return self.playlist_result(entries, course_id, course_title)
+5 -1
View File
@@ -13,7 +13,7 @@ from ..utils import (
class MailRuIE(InfoExtractor):
IE_NAME = 'mailru'
IE_DESC = 'Видео@Mail.Ru'
_VALID_URL = r'https?://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
_VALID_URL = r'https?://(?:(?:www|m)\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
_TESTS = [
{
@@ -61,6 +61,10 @@ class MailRuIE(InfoExtractor):
'duration': 6001,
},
'skip': 'Not accessible from Travis CI server',
},
{
'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html',
'only_matching': True,
}
]
+1
View File
@@ -47,6 +47,7 @@ class MatchTVIE(InfoExtractor):
video_url = self._download_json(request, video_id)['data']['videoUrl']
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
formats = self._extract_f4m_formats(f4m_url, video_id)
self._sort_formats(formats)
return {
'id': video_id,
'title': self._live_title('Матч ТВ - Прямой эфир'),
+1
View File
@@ -67,6 +67,7 @@ class MiTeleIE(InfoExtractor):
formats.extend(self._extract_f4m_formats(
file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
display_id, f4m_id=loc))
self._sort_formats(formats)
title = self._search_regex(
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title')
+1
View File
@@ -63,6 +63,7 @@ class NRKIE(InfoExtractor):
if determine_ext(media_url) == 'f4m':
formats = self._extract_f4m_formats(
media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds')
self._sort_formats(formats)
else:
formats = [{
'url': media_url,
+9 -4
View File
@@ -279,13 +279,18 @@ class PluralsightCourseIE(PluralsightBaseIE):
course_id, 'Downloading course data JSON')
entries = []
for module in course_data:
for num, module in enumerate(course_data, 1):
for clip in module.get('clips', []):
player_parameters = clip.get('playerParameters')
if not player_parameters:
continue
entries.append(self.url_result(
'%s/training/player?%s' % (self._API_BASE, player_parameters),
'Pluralsight'))
entries.append({
'_type': 'url_transparent',
'url': '%s/training/player?%s' % (self._API_BASE, player_parameters),
'ie_key': PluralsightIE.ie_key(),
'chapter': module.get('title'),
'chapter_number': num,
'chapter_id': module.get('moduleRef'),
})
return self.playlist_result(entries, course_id, title, description)
+1
View File
@@ -31,6 +31,7 @@ class RestudyIE(InfoExtractor):
formats = self._extract_smil_formats(
'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id,
video_id)
self._sort_formats(formats)
return {
'id': video_id,
+1
View File
@@ -49,6 +49,7 @@ class RteIE(InfoExtractor):
# f4m_url = server + relative_url
f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
self._sort_formats(f4m_formats)
return {
'id': video_id,
+1
View File
@@ -209,6 +209,7 @@ class RTVELiveIE(InfoExtractor):
png = self._download_webpage(png_url, video_id, 'Downloading url information')
m3u8_url = _decrypt_url(png)
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
self._sort_formats(formats)
return {
'id': video_id,
+1
View File
@@ -38,6 +38,7 @@ class RTVNHIE(InfoExtractor):
item['file'], video_id, ext='mp4', entry_protocol='m3u8_native'))
elif item.get('type') == '':
formats.append({'url': item['file']})
self._sort_formats(formats)
return {
'id': video_id,
+1
View File
@@ -77,6 +77,7 @@ class ShahidIE(InfoExtractor):
raise ExtractorError('This video is DRM protected.', expected=True)
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
self._sort_formats(formats)
video = self._download_json(
'%s/%s/%s?%s' % (
+1
View File
@@ -99,6 +99,7 @@ class SportBoxEmbedIE(InfoExtractor):
webpage, 'hls file')
formats = self._extract_m3u8_formats(hls, video_id, 'mp4')
self._sort_formats(formats)
title = self._search_regex(
r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title')
+1
View File
@@ -82,6 +82,7 @@ class TelecincoIE(InfoExtractor):
)
formats = self._extract_m3u8_formats(
token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native')
self._sort_formats(formats)
return {
'id': embed_data['videoId'],
+1
View File
@@ -69,6 +69,7 @@ class TubiTvIE(InfoExtractor):
apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu')
m3u8_url = codecs.decode(apu, 'rot_13')[::-1]
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
self._sort_formats(formats)
return {
'id': video_id,
+17 -4
View File
@@ -102,6 +102,9 @@ class TwitterCardIE(TwitterBaseIE):
r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'),
video_id)
if config.get('source_type') == 'vine':
return self.url_result(config['player_url'], 'Vine')
def _search_dimensions_in_video_url(a_format, video_url):
m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
if m:
@@ -110,10 +113,9 @@ class TwitterCardIE(TwitterBaseIE):
'height': int(m.group('height')),
})
playlist = config.get('playlist')
if playlist:
video_url = playlist[0]['source']
video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
if video_url:
f = {
'url': video_url,
}
@@ -185,7 +187,6 @@ class TwitterIE(InfoExtractor):
'ext': 'mp4',
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 12.922,
'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple',
@@ -247,6 +248,18 @@ class TwitterIE(InfoExtractor):
'params': {
'skip_download': True, # requires ffmpeg
},
}, {
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
'md5': '89a15ed345d13b86e9a5a5e051fa308a',
'info_dict': {
'id': 'MIOxnrUteUd',
'ext': 'mp4',
'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
'uploader': 'TAKUMA',
'uploader_id': '1004126642786242560',
'upload_date': '20140615',
},
'add_ie': ['Vine'],
}]
def _real_extract(self, url):
+58 -21
View File
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
@@ -8,6 +10,8 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
determine_ext,
extract_attributes,
ExtractorError,
float_or_none,
int_or_none,
@@ -51,21 +55,26 @@ class UdemyIE(InfoExtractor):
}]
def _enroll_course(self, base_url, webpage, course_id):
def combine_url(base_url, url):
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
checkout_url = unescapeHTML(self._search_regex(
r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/payment/checkout/.+?)\1',
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/payment/checkout/.+?)\1',
webpage, 'checkout url', group='url', default=None))
if checkout_url:
raise ExtractorError(
'Course %s is not free. You have to pay for it before you can download. '
'Use this URL to confirm purchase: %s' % (course_id, checkout_url), expected=True)
'Use this URL to confirm purchase: %s'
% (course_id, combine_url(base_url, checkout_url)),
expected=True)
enroll_url = unescapeHTML(self._search_regex(
r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1',
webpage, 'enroll url', group='url', default=None))
if enroll_url:
if not enroll_url.startswith('http'):
enroll_url = compat_urlparse.urljoin(base_url, enroll_url)
webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course')
webpage = self._download_webpage(
combine_url(base_url, enroll_url),
course_id, 'Enrolling in the course')
if '>You have enrolled in' in webpage:
self.to_screen('%s: Successfully enrolled in the course' % course_id)
@@ -73,11 +82,8 @@ class UdemyIE(InfoExtractor):
return self._download_json(
'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?%s' % (
course_id, lecture_id, compat_urllib_parse_urlencode({
'video_only': '',
'auto_play': '',
'fields[lecture]': 'title,description,asset',
'fields[lecture]': 'title,description,view_html,asset',
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data',
'instructorPreviewMode': 'False',
})),
lecture_id, 'Downloading lecture JSON')
@@ -200,7 +206,7 @@ class UdemyIE(InfoExtractor):
def extract_output_format(src):
return {
'url': src['url'],
'format_id': '%sp' % (src.get('label') or format_id),
'format_id': '%sp' % (src.get('height') or format_id),
'width': int_or_none(src.get('width')),
'height': int_or_none(src.get('height')),
'vbr': int_or_none(src.get('video_bitrate_in_kbps')),
@@ -217,9 +223,13 @@ class UdemyIE(InfoExtractor):
if not isinstance(outputs, dict):
outputs = {}
for format_id, output in outputs.items():
if isinstance(output, dict) and output.get('url'):
formats.append(extract_output_format(output))
def add_output_format_meta(f, key):
output = outputs.get(key)
if isinstance(output, dict):
output_format = extract_output_format(output)
output_format.update(f)
return output_format
return f
download_urls = asset.get('download_urls')
if isinstance(download_urls, dict):
@@ -232,21 +242,48 @@ class UdemyIE(InfoExtractor):
format_id = format_.get('label')
f = {
'url': format_['file'],
'format_id': '%sp' % format_id,
'height': int_or_none(format_id),
}
if format_id:
# Some videos contain additional metadata (e.g.
# https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
output = outputs.get(format_id)
if isinstance(output, dict):
output_format = extract_output_format(output)
output_format.update(f)
f = output_format
else:
f['format_id'] = '%sp' % format_id
f = add_output_format_meta(f, format_id)
formats.append(f)
self._sort_formats(formats)
view_html = lecture.get('view_html')
if view_html:
view_html_urls = set()
for source in re.findall(r'<source[^>]+>', view_html):
attributes = extract_attributes(source)
src = attributes.get('src')
if not src:
continue
res = attributes.get('data-res')
height = int_or_none(res)
if src in view_html_urls:
continue
view_html_urls.add(src)
if attributes.get('type') == 'application/x-mpegURL' or determine_ext(src) == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
for f in m3u8_formats:
m = re.search(r'/hls_(?P<height>\d{3,4})_(?P<tbr>\d{2,})/', f['url'])
if m:
if not f.get('height'):
f['height'] = int(m.group('height'))
if not f.get('tbr'):
f['tbr'] = int(m.group('tbr'))
formats.extend(m3u8_formats)
else:
formats.append(add_output_format_meta({
'url': src,
'format_id': '%dp' % height if height else None,
'height': height,
}, res))
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
return {
'id': video_id,
+1 -1
View File
@@ -152,7 +152,7 @@ class VevoIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
response = self._download_json(
json_url, video_id, 'Downloading video info', 'Unable to download info')
video_info = response.get('video') or {}
+1
View File
@@ -111,6 +111,7 @@ class VideomoreIE(InfoExtractor):
video_url = xpath_text(video, './/video_url', 'video url', fatal=True)
formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds')
self._sort_formats(formats)
data = self._download_json(
'http://videomore.ru/video/tracks/%s.json' % video_id,
+1
View File
@@ -50,6 +50,7 @@ class VierIE(InfoExtractor):
playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
self._sort_formats(formats)
title = self._og_search_title(webpage, default=display_id)
description = self._og_search_description(webpage, default=None)
+1
View File
@@ -151,6 +151,7 @@ class ViideaIE(InfoExtractor):
smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id)
smil = self._download_smil(smil_url, lecture_id)
info = self._parse_smil(smil, smil_url, lecture_id)
self._sort_formats(info['formats'])
info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id)
info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id)
if multipart:
+3 -1
View File
@@ -41,10 +41,12 @@ class YnetIE(InfoExtractor):
m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
if m:
title = m.group('title')
formats = self._extract_f4m_formats(f4m_url, video_id)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': self._extract_f4m_formats(f4m_url, video_id),
'formats': formats,
'thumbnail': self._og_search_thumbnail(webpage),
}
+3 -1
View File
@@ -234,7 +234,9 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
def _process_page(self, content):
for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
for playlist_id in orderedSet(re.findall(
r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
content)):
yield self.url_result(
'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
+5 -2
View File
@@ -417,9 +417,12 @@ def sanitize_path(s):
# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
# unwanted failures due to missing protocol
def sanitize_url(url):
return 'http:%s' % url if url.startswith('//') else url
def sanitized_Request(url, *args, **kwargs):
return compat_urllib_request.Request(
'http:%s' % url if url.startswith('//') else url, *args, **kwargs)
return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
def orderedSet(iterable):
+1 -1
View File
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.03.26'
__version__ = '2016.03.27'