1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2026-05-13 00:53:28 +00:00

Compare commits

..

21 Commits

Author SHA1 Message Date
Sergey M․ 3acff9423d release 2016.09.18 2016-09-18 17:16:55 +07:00
Sergey M․ 9ca93b99d1 [ChangeLog] Actualize 2016-09-18 17:15:22 +07:00
Sergey M․ 14ae11efab [vyborymos] Add extractor (Closes #10692) 2016-09-18 16:56:40 +07:00
Sergey M․ 190d2027d0 [xfileshare] Add title regex for streamin.to and fallback to video id (Closes #10646) 2016-09-18 07:22:06 +07:00
Sergey M․ 26394d021d [globo:article] Add support for multiple videos (Closes #10653) 2016-09-17 23:34:10 +07:00
Sergey M․ 30d0b549be [extractor/common] Add manifest_url for hls and hds formats 2016-09-17 21:33:38 +07:00
Sergey M․ 86f4d14f81 Refactor fragments interface and dash segments downloader
- Eliminate segment_urls and initialization_url
+ Introduce manifest_url (manifest may contain unfragmented data in this case url will be used for direct media URL and manifest_url for manifest itself correspondingly)
* Rewrite dashsegments downloader to use fragments data
* Improve generic mpd extraction
2016-09-17 20:35:22 +07:00
Sergey M․ 21d21b0c72 [svt] Fix DASH formats extraction 2016-09-17 19:25:31 +07:00
Sergey M․ b4c1d6e800 [extractor/common] Expose fragments interface for dashsegments formats 2016-09-17 18:31:18 +07:00
Sergey M․ a0d5077c8d [extractor/common] Introduce fragments interface 2016-09-17 18:31:09 +07:00
Yen Chi Hsuan 584d6f3457 [thisav] Recognize jwplayers (closes #10447) 2016-09-17 18:46:43 +08:00
Yen Chi Hsuan e14c82bd6b [jwplatform] Use js_to_json to detect more JWPlayers 2016-09-17 18:45:08 +08:00
Sergey M․ c51a7f0b2f [franceinter] Fix upload date extraction 2016-09-17 15:44:37 +07:00
Remita Amine d05ef09d9d [mangomolo] fix domain regex 2016-09-17 08:11:01 +01:00
Remita Amine 30d9e20938 [postprocessor/ffmpeg] apply FFmpegFixupM3u8PP only for videos with aac codec(#5591) 2016-09-16 22:06:55 +01:00
Remita Amine fc86d4eed0 [mangomolo] fix typo 2016-09-16 20:10:47 +01:00
Remita Amine 7d273a387a [mangomolo] add support for Mangomolo embeds 2016-09-16 19:31:39 +01:00
Remita Amine 6ad0219556 [common] add helper method for Wowza Streaming Engine format extraction 2016-09-16 19:30:38 +01:00
Remita Amine 98b7506e96 [toutv] add support for authentication(closes #10669) 2016-09-16 17:40:15 +01:00
Sergey M․ 52dc8a9b3f [franceinter] Fix upload date extraction 2016-09-16 22:02:59 +07:00
Sergey M․ 9d8985a165 [tv4] Fix hls and hds formats (Closes #10659) 2016-09-16 00:54:34 +07:00
24 changed files with 610 additions and 290 deletions
+3 -3
View File
@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.15**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.18**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.09.15
[debug] youtube-dl version 2016.09.18
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
+22
View File
@@ -1,3 +1,25 @@
version 2016.09.18
Core
+ Introduce manifest_url and fragments fields in formats dictionary for
fragmented media
+ Provide manifest_url field for DASH segments, HLS and HDS
+ Provide fragments field for DASH segments
* Rework DASH segments downloader to use fragments field
+ Add helper method for Wowza Streaming Engine formats extraction
Extractors
+ [vyborymos] Add extractor for vybory.mos.ru (#10692)
+ [xfileshare] Add title regular expression for streamin.to (#10646)
+ [globo:article] Add support for multiple videos (#10653)
+ [thisav] Recognize HTML5 videos (#10447)
* [jwplatform] Improve JWPlayer detection
+ [mangomolo] Add support for Mangomolo embeds
+ [toutv] Add support for authentication (#10669)
* [franceinter] Fix upload date extraction
* [tv4] Fix HLS and HDS formats extraction (#10659)
version 2016.09.15
Core
+3
View File
@@ -388,6 +388,8 @@
- **mailru**: Видео@Mail.Ru
- **MakersChannel**
- **MakerTV**
- **mangomolo:live**
- **mangomolo:video**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
@@ -849,6 +851,7 @@
- **VRT**
- **vube**: Vube.com
- **VuClip**
- **VyboryMos**
- **Walla**
- **washingtonpost**
- **washingtonpost:article**
+12 -23
View File
@@ -1,7 +1,6 @@
from __future__ import unicode_literals
import os
import re
from .fragment import FragmentFD
from ..compat import compat_urllib_error
@@ -19,34 +18,32 @@ class DashSegmentsFD(FragmentFD):
FD_NAME = 'dashsegments'
def real_download(self, filename, info_dict):
base_url = info_dict['url']
segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls']
initialization_url = info_dict.get('initialization_url')
segments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
ctx = {
'filename': filename,
'total_frags': len(segment_urls) + (1 if initialization_url else 0),
'total_frags': len(segments),
}
self._prepare_and_start_frag_download(ctx)
def combine_url(base_url, target_url):
if re.match(r'^https?://', target_url):
return target_url
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
segments_filenames = []
fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
def process_segment(segment, tmp_filename, fatal):
target_url, segment_name = segment
def process_segment(segment, tmp_filename, num):
segment_url = segment['url']
segment_name = 'Frag%d' % num
target_filename = '%s-%s' % (tmp_filename, segment_name)
# In DASH, the first segment contains necessary headers to
# generate a valid MP4 file, so always abort for the first segment
fatal = num == 0 or not skip_unavailable_fragments
count = 0
while count <= fragment_retries:
try:
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
success = ctx['dl'].download(target_filename, {'url': segment_url})
if not success:
return False
down, target_sanitized = sanitize_open(target_filename, 'rb')
@@ -72,16 +69,8 @@ class DashSegmentsFD(FragmentFD):
return False
return True
segments_to_download = [(initialization_url, 'Init')] if initialization_url else []
segments_to_download.extend([
(segment_url, 'Seg%d' % i)
for i, segment_url in enumerate(segment_urls)])
for i, segment in enumerate(segments_to_download):
# In DASH, the first segment contains necessary headers to
# generate a valid MP4 file, so always abort for the first segment
fatal = i == 0 or not skip_unavailable_fragments
if not process_segment(segment, ctx['tmpfilename'], fatal):
for i, segment in enumerate(segments):
if not process_segment(segment, ctx['tmpfilename'], i):
return False
self._finish_frag_download(ctx)
+24 -39
View File
@@ -50,25 +50,6 @@ class AWAANBaseIE(InfoExtractor):
'is_live': is_live,
}
def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
formats = []
format_url_base = 'http' + self._html_search_regex(
[
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
r'<a[^>]+href="rtsp(://[^"]+)"'
], webpage, 'format url')
formats.extend(self._extract_mpd_formats(
format_url_base + '/manifest.mpd',
video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_m3u8_formats(
format_url_base + '/playlist.m3u8', video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
format_url_base + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
self._sort_formats(formats)
return formats
class AWAANVideoIE(AWAANBaseIE):
IE_NAME = 'awaan:video'
@@ -99,16 +80,18 @@ class AWAANVideoIE(AWAANBaseIE):
video_id, headers={'Origin': 'http://awaan.ae'})
info = self._parse_video_data(video_data, video_id, False)
webpage = self._download_webpage(
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
compat_urllib_parse_urlencode({
'id': video_data['id'],
'user_id': video_data['user_id'],
'signature': video_data['signature'],
'countries': 'Q0M=',
'filter': 'DENY',
}), video_id)
info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native')
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
'id': video_data['id'],
'user_id': video_data['user_id'],
'signature': video_data['signature'],
'countries': 'Q0M=',
'filter': 'DENY',
})
info.update({
'_type': 'url_transparent',
'url': embed_url,
'ie_key': 'MangomoloVideo',
})
return info
@@ -138,16 +121,18 @@ class AWAANLiveIE(AWAANBaseIE):
channel_id, headers={'Origin': 'http://awaan.ae'})
info = self._parse_video_data(channel_data, channel_id, True)
webpage = self._download_webpage(
'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' +
compat_urllib_parse_urlencode({
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
'signature': channel_data['signature'],
'countries': 'Q0M=',
'filter': 'DENY',
}), channel_id)
info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8')
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
'signature': channel_data['signature'],
'countries': 'Q0M=',
'filter': 'DENY',
})
info.update({
'_type': 'url_transparent',
'url': embed_url,
'ie_key': 'MangomoloLive',
})
return info
+158 -58
View File
@@ -87,6 +87,9 @@ class InfoExtractor(object):
Potential fields:
* url Mandatory. The URL of the video file
* manifest_url
The URL of the manifest file in case of
fragmented media (DASH, hls, hds)
* ext Will be calculated from URL if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
@@ -115,6 +118,11 @@ class InfoExtractor(object):
download, lower-case.
"http", "https", "rtsp", "rtmp", "rtmpe",
"m3u8", "m3u8_native" or "http_dash_segments".
* fragments A list of fragments of the fragmented media,
with the following entries:
* "url" (mandatory) - fragment's URL
* "duration" (optional, int or float)
* "filesize" (optional, int)
* preference Order number of this format. If this field is
present and not None, the formats get sorted
by this field, regardless of all other values.
@@ -1142,6 +1150,7 @@ class InfoExtractor(object):
formats.append({
'format_id': format_id,
'url': manifest_url,
'manifest_url': manifest_url,
'ext': 'flv' if bootstrap_info is not None else None,
'tbr': tbr,
'width': width,
@@ -1247,9 +1256,11 @@ class InfoExtractor(object):
# format_id intact.
if not live:
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
manifest_url = format_url(line.strip())
f = {
'format_id': '-'.join(format_id),
'url': format_url(line.strip()),
'url': manifest_url,
'manifest_url': manifest_url,
'tbr': tbr,
'ext': ext,
'fps': float_or_none(last_info.get('FRAME-RATE')),
@@ -1521,9 +1532,10 @@ class InfoExtractor(object):
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
return self._parse_mpd_formats(
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
formats_dict=formats_dict, mpd_url=mpd_url)
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
"""
Parse formats from MPD manifest.
References:
@@ -1544,42 +1556,52 @@ class InfoExtractor(object):
def extract_multisegment_info(element, ms_parent_info):
ms_info = ms_parent_info.copy()
# As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
# common attributes and elements. We will only extract relevant
# for us.
def extract_common(source):
segment_timeline = source.find(_add_ns('SegmentTimeline'))
if segment_timeline is not None:
s_e = segment_timeline.findall(_add_ns('S'))
if s_e:
ms_info['total_number'] = 0
ms_info['s'] = []
for s in s_e:
r = int(s.get('r', 0))
ms_info['total_number'] += 1 + r
ms_info['s'].append({
't': int(s.get('t', 0)),
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
'd': int(s.attrib['d']),
'r': r,
})
start_number = source.get('startNumber')
if start_number:
ms_info['start_number'] = int(start_number)
timescale = source.get('timescale')
if timescale:
ms_info['timescale'] = int(timescale)
segment_duration = source.get('duration')
if segment_duration:
ms_info['segment_duration'] = int(segment_duration)
def extract_Initialization(source):
initialization = source.find(_add_ns('Initialization'))
if initialization is not None:
ms_info['initialization_url'] = initialization.attrib['sourceURL']
segment_list = element.find(_add_ns('SegmentList'))
if segment_list is not None:
extract_common(segment_list)
extract_Initialization(segment_list)
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
if segment_urls_e:
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
initialization = segment_list.find(_add_ns('Initialization'))
if initialization is not None:
ms_info['initialization_url'] = initialization.attrib['sourceURL']
else:
segment_template = element.find(_add_ns('SegmentTemplate'))
if segment_template is not None:
start_number = segment_template.get('startNumber')
if start_number:
ms_info['start_number'] = int(start_number)
segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
if segment_timeline is not None:
s_e = segment_timeline.findall(_add_ns('S'))
if s_e:
ms_info['total_number'] = 0
ms_info['s'] = []
for s in s_e:
r = int(s.get('r', 0))
ms_info['total_number'] += 1 + r
ms_info['s'].append({
't': int(s.get('t', 0)),
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
'd': int(s.attrib['d']),
'r': r,
})
else:
timescale = segment_template.get('timescale')
if timescale:
ms_info['timescale'] = int(timescale)
segment_duration = segment_template.get('duration')
if segment_duration:
ms_info['segment_duration'] = int(segment_duration)
extract_common(segment_template)
media_template = segment_template.get('media')
if media_template:
ms_info['media_template'] = media_template
@@ -1587,11 +1609,14 @@ class InfoExtractor(object):
if initialization:
ms_info['initialization_url'] = initialization
else:
initialization = segment_template.find(_add_ns('Initialization'))
if initialization is not None:
ms_info['initialization_url'] = initialization.attrib['sourceURL']
extract_Initialization(segment_template)
return ms_info
def combine_url(base_url, target_url):
if re.match(r'^https?://', target_url):
return target_url
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats = []
for period in mpd_doc.findall(_add_ns('Period')):
@@ -1634,6 +1659,7 @@ class InfoExtractor(object):
f = {
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
'url': base_url,
'manifest_url': mpd_url,
'ext': mimetype2ext(mime_type),
'width': int_or_none(representation_attrib.get('width')),
'height': int_or_none(representation_attrib.get('height')),
@@ -1648,9 +1674,7 @@ class InfoExtractor(object):
}
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
if 'total_number' not in representation_ms_info and 'segment_duration':
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
media_template = representation_ms_info['media_template']
media_template = media_template.replace('$RepresentationID$', representation_id)
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
@@ -1659,46 +1683,79 @@ class InfoExtractor(object):
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
# can't be used at the same time
if '%(Number' in media_template:
representation_ms_info['segment_urls'] = [
media_template % {
if '%(Number' in media_template and 's' not in representation_ms_info:
segment_duration = None
if 'total_number' not in representation_ms_info and 'segment_duration':
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
representation_ms_info['fragments'] = [{
'url': media_template % {
'Number': segment_number,
'Bandwidth': representation_attrib.get('bandwidth'),
}
for segment_number in range(
representation_ms_info['start_number'],
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
},
'duration': segment_duration,
} for segment_number in range(
representation_ms_info['start_number'],
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
else:
representation_ms_info['segment_urls'] = []
# $Number*$ or $Time$ in media template with S list available
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
representation_ms_info['fragments'] = []
segment_time = 0
segment_d = None
segment_number = representation_ms_info['start_number']
def add_segment_url():
representation_ms_info['segment_urls'].append(
media_template % {
'Time': segment_time,
'Bandwidth': representation_attrib.get('bandwidth'),
}
)
segment_url = media_template % {
'Time': segment_time,
'Bandwidth': representation_attrib.get('bandwidth'),
'Number': segment_number,
}
representation_ms_info['fragments'].append({
'url': segment_url,
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
})
for num, s in enumerate(representation_ms_info['s']):
segment_time = s.get('t') or segment_time
segment_d = s['d']
add_segment_url()
segment_number += 1
for r in range(s.get('r', 0)):
segment_time += s['d']
segment_time += segment_d
add_segment_url()
segment_time += s['d']
if 'segment_urls' in representation_ms_info:
segment_number += 1
segment_time += segment_d
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
# No media template
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
# or any YouTube dashsegments video
fragments = []
s_num = 0
for segment_url in representation_ms_info['segment_urls']:
s = representation_ms_info['s'][s_num]
for r in range(s.get('r', 0) + 1):
fragments.append({
'url': segment_url,
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
})
representation_ms_info['fragments'] = fragments
# NB: MPD manifest may contain direct URLs to unfragmented media.
# No fragments key is present in this case.
if 'fragments' in representation_ms_info:
f.update({
'segment_urls': representation_ms_info['segment_urls'],
'fragments': [],
'protocol': 'http_dash_segments',
})
if 'initialization_url' in representation_ms_info:
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
f.update({
'initialization_url': initialization_url,
})
if not f.get('url'):
f['url'] = initialization_url
f['fragments'].append({'url': initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
for fragment in f['fragments']:
fragment['url'] = combine_url(base_url, fragment['url'])
try:
existing_format = next(
fo for fo in formats
@@ -1795,6 +1852,49 @@ class InfoExtractor(object):
m3u8_id='hls', fatal=False))
return formats
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
http_base_url = 'http' + url_base
formats = []
if 'm3u8' not in skip_protocols:
formats.extend(self._extract_m3u8_formats(
http_base_url + '/playlist.m3u8', video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
if 'f4m' not in skip_protocols:
formats.extend(self._extract_f4m_formats(
http_base_url + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
if re.search(r'(?:/smil:|\.smil)', url_base):
if 'dash' not in skip_protocols:
formats.extend(self._extract_mpd_formats(
http_base_url + '/manifest.mpd',
video_id, mpd_id='dash', fatal=False))
if 'smil' not in skip_protocols:
rtmp_formats = self._extract_smil_formats(
http_base_url + '/jwplayer.smil',
video_id, fatal=False)
for rtmp_format in rtmp_formats:
rtsp_format = rtmp_format.copy()
rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
del rtsp_format['play_path']
del rtsp_format['ext']
rtsp_format.update({
'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
'protocol': 'rtsp',
})
formats.extend([rtmp_format, rtsp_format])
else:
for protocol in ('rtmp', 'rtsp'):
if protocol not in skip_protocols:
formats.append({
'url': protocol + url_base,
'format_id': protocol,
'protocol': protocol,
})
return formats
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()
+5
View File
@@ -472,6 +472,10 @@ from .macgamestore import MacGameStoreIE
from .mailru import MailRuIE
from .makerschannel import MakersChannelIE
from .makertv import MakerTVIE
from .mangomolo import (
MangomoloVideoIE,
MangomoloLiveIE,
)
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .meta import METAIE
@@ -1065,6 +1069,7 @@ from .vporn import VpornIE
from .vrt import VRTIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vyborymos import VyboryMosIE
from .walla import WallaIE
from .washingtonpost import (
WashingtonPostIE,
+8 -8
View File
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import month_by_name
@@ -10,14 +9,14 @@ class FranceInterIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
_TEST = {
'url': 'https://www.franceinter.fr/emissions/la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013',
'md5': '4764932e466e6f6c79c317d2e74f6884',
'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
'md5': '9e54d7bdb6fdc02a841007f8a975c094',
'info_dict': {
'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013',
'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
'ext': 'mp3',
'title': 'LHistoire dans les jeux vidéo du 18 décembre 2013 - France Inter',
'description': 'md5:7f2ce449894d1e585932273080fb410d',
'upload_date': '20131218',
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
'description': 'md5:401969c5d318c061f86bda1fa359292b',
'upload_date': '20160907',
},
}
@@ -39,7 +38,8 @@ class FranceInterIE(InfoExtractor):
if upload_date_str:
upload_date_list = upload_date_str.split()
upload_date_list.reverse()
upload_date_list[1] = compat_str(month_by_name(upload_date_list[1], lang='fr'))
upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
upload_date_list[2] = '%02d' % int(upload_date_list[2])
upload_date = ''.join(upload_date_list)
else:
upload_date = None
+32 -1
View File
@@ -1657,7 +1657,9 @@ class GenericIE(InfoExtractor):
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats(
doc, video_id, mpd_base_url=url.rpartition('/')[0])
doc, video_id,
mpd_base_url=full_response.geturl().rpartition('/')[0],
mpd_url=url)
self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
@@ -2254,6 +2256,35 @@ class GenericIE(InfoExtractor):
return self.url_result(
self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
# Look for Mangomolo embeds
mobj = re.search(
r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
(?:
video\?.*?\bid=(?P<video_id>\d+)|
index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
).+?)\1''', webpage)
if mobj is not None:
info = {
'_type': 'url_transparent',
'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
'title': video_title,
'description': video_description,
'thumbnail': video_thumbnail,
'uploader': video_uploader,
}
video_id = mobj.group('video_id')
if video_id:
info.update({
'ie_key': 'MangomoloVideo',
'id': video_id,
})
else:
info.update({
'ie_key': 'MangomoloLive',
'id': mobj.group('channel_id'),
})
return info
# Look for Instagram embeds
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
if instagram_embed_url is not None:
+28 -11
View File
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
import random
import re
import math
from .common import InfoExtractor
@@ -14,6 +15,7 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
orderedSet,
str_or_none,
)
@@ -63,6 +65,9 @@ class GloboIE(InfoExtractor):
}, {
'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
'only_matching': True,
}, {
'url': 'globo:3607726',
'only_matching': True,
}]
class MD5(object):
@@ -396,7 +401,7 @@ class GloboIE(InfoExtractor):
class GloboArticleIE(InfoExtractor):
_VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)(?:\.html)?'
_VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
_VIDEOID_REGEXES = [
r'\bdata-video-id=["\'](\d{7,})',
@@ -408,15 +413,20 @@ class GloboArticleIE(InfoExtractor):
_TESTS = [{
'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
'md5': '307fdeae4390ccfe6ba1aa198cf6e72b',
'info_dict': {
'id': '3652183',
'ext': 'mp4',
'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião',
'duration': 110.711,
'uploader': 'Rede Globo',
'uploader_id': '196',
}
'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes',
'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões',
'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12',
},
'playlist_count': 1,
}, {
'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html',
'info_dict': {
'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato',
'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF",
'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c',
},
'playlist_count': 6,
}, {
'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
'only_matching': True,
@@ -435,5 +445,12 @@ class GloboArticleIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id')
return self.url_result('globo:%s' % video_id, 'Globo')
video_ids = []
for video_regex in self._VIDEOID_REGEXES:
video_ids.extend(re.findall(video_regex, webpage))
entries = [
self.url_result('globo:%s' % video_id, GloboIE.ie_key())
for video_id in orderedSet(video_ids)]
title = self._og_search_title(webpage, fatal=False)
description = self._html_search_meta('description', webpage)
return self.playlist_result(entries, display_id, title, description)
+4 -2
View File
@@ -9,6 +9,7 @@ from ..utils import (
determine_ext,
float_or_none,
int_or_none,
js_to_json,
mimetype2ext,
)
@@ -19,14 +20,15 @@ class JWPlatformBaseIE(InfoExtractor):
# TODO: Merge this with JWPlayer-related codes in generic.py
mobj = re.search(
'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)',
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
webpage)
if mobj:
return mobj.group('options')
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
jwplayer_data = self._parse_json(
self._find_jwplayer_data(webpage), video_id)
self._find_jwplayer_data(webpage), video_id,
transform_source=js_to_json)
return self._parse_jwplayer_data(
jwplayer_data, video_id, *args, **kwargs)
+54
View File
@@ -0,0 +1,54 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import (
int_or_none,
)
class MangomoloBaseIE(InfoExtractor):
def _get_real_id(self, page_id):
return page_id
def _real_extract(self, url):
page_id = self._get_real_id(self._match_id(url))
webpage = self._download_webpage(url, page_id)
hidden_inputs = self._hidden_inputs(webpage)
m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
format_url = self._html_search_regex(
[
r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)',
r'<a[^>]+href="(rtsp://[^"]+)"'
], webpage, 'format url')
formats = self._extract_wowza_formats(
format_url, page_id, m3u8_entry_protocol, ['smil'])
self._sort_formats(formats)
return {
'id': page_id,
'title': self._live_title(page_id) if self._IS_LIVE else page_id,
'uploader_id': hidden_inputs.get('userid'),
'duration': int_or_none(hidden_inputs.get('duration')),
'is_live': self._IS_LIVE,
'formats': formats,
}
class MangomoloVideoIE(MangomoloBaseIE):
IE_NAME = 'mangomolo:video'
_VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)'
_IS_LIVE = False
class MangomoloLiveIE(MangomoloBaseIE):
IE_NAME = 'mangomolo:live'
_VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
_IS_LIVE = True
def _get_real_id(self, page_id):
return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode()
+34 -21
View File
@@ -13,6 +13,7 @@ from ..utils import (
xpath_element,
ExtractorError,
determine_protocol,
unsmuggle_url,
)
@@ -35,28 +36,51 @@ class RadioCanadaIE(InfoExtractor):
}
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
app_code, video_id = re.match(self._VALID_URL, url).groups()
device_types = ['ipad', 'android']
metadata = self._download_xml(
'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
video_id, note='Downloading metadata XML', query={
'appCode': app_code,
'idMedia': video_id,
})
def get_meta(name):
el = find_xpath_attr(metadata, './/Meta', 'name', name)
return el.text if el is not None else None
if get_meta('protectionType'):
raise ExtractorError('This video is DRM protected.', expected=True)
device_types = ['ipad']
if app_code != 'toutv':
device_types.append('flash')
if not smuggled_data:
device_types.append('android')
formats = []
# TODO: extract f4m formats
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
for device_type in device_types:
v_data = self._download_xml(
'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
video_id, note='Downloading %s XML' % device_type, query={
'appCode': app_code,
'idMedia': video_id,
'connectionType': 'broadband',
'multibitrate': 'true',
'deviceType': device_type,
validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx'
query = {
'appCode': app_code,
'idMedia': video_id,
'connectionType': 'broadband',
'multibitrate': 'true',
'deviceType': device_type,
}
if smuggled_data:
validation_url = 'https://services.radio-canada.ca/media/validation/v2/'
query.update(smuggled_data)
else:
query.update({
# paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
'paysJ391wsHjbOJwvCs26toz': 'CA',
'bypasslock': 'NZt5K62gRqfc',
}, fatal=False)
})
v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False)
v_url = xpath_text(v_data, 'url')
if not v_url:
continue
@@ -101,17 +125,6 @@ class RadioCanadaIE(InfoExtractor):
f4m_id='hds', fatal=False))
self._sort_formats(formats)
metadata = self._download_xml(
'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
video_id, note='Downloading metadata XML', query={
'appCode': app_code,
'idMedia': video_id,
})
def get_meta(name):
el = find_xpath_attr(metadata, './/Meta', 'name', name)
return el.text if el is not None else None
return {
'id': video_id,
'title': get_meta('Title'),
+1 -1
View File
@@ -16,7 +16,7 @@ class SVTBaseIE(InfoExtractor):
def _extract_video(self, video_info, video_id):
formats = []
for vr in video_info['videoReferences']:
player_type = vr.get('playerType')
player_type = vr.get('playerType') or vr.get('format')
vurl = vr['url']
ext = determine_ext(vurl)
if ext == 'm3u8':
+28 -11
View File
@@ -3,13 +3,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import determine_ext
from .jwplatform import JWPlatformBaseIE
class ThisAVIE(InfoExtractor):
class ThisAVIE(JWPlatformBaseIE):
_VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
_TEST = {
_TESTS = [{
'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
'md5': '0480f1ef3932d901f0e0e719f188f19b',
'info_dict': {
@@ -19,7 +18,17 @@ class ThisAVIE(InfoExtractor):
'uploader': 'dj7970',
'uploader_id': 'dj7970'
}
}
}, {
'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html',
'md5': 'ba90c076bd0f80203679e5b60bf523ee',
'info_dict': {
'id': '242352',
'ext': 'mp4',
'title': 'Nerdy 18yo Big Ass Tattoos and Glasses',
'uploader': 'cybersluts',
'uploader_id': 'cybersluts',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -28,20 +37,28 @@ class ThisAVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title')
video_url = self._html_search_regex(
r"addVariable\('file','([^']+)'\);", webpage, 'video url')
r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None)
if video_url:
info_dict = {
'formats': [{
'url': video_url,
}],
}
else:
info_dict = self._extract_jwplayer_data(
webpage, video_id, require_title=False)
uploader = self._html_search_regex(
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
webpage, 'uploader name', fatal=False)
uploader_id = self._html_search_regex(
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
webpage, 'uploader id', fatal=False)
ext = determine_ext(video_url)
return {
info_dict.update({
'id': video_id,
'url': video_url,
'uploader': uploader,
'uploader_id': uploader_id,
'title': title,
'ext': ext,
}
})
return info_dict
+58 -2
View File
@@ -2,12 +2,22 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import int_or_none
from ..utils import (
int_or_none,
js_to_json,
ExtractorError,
urlencode_postdata,
extract_attributes,
smuggle_url,
)
class TouTvIE(InfoExtractor):
_NETRC_MACHINE = 'toutv'
IE_NAME = 'tou.tv'
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)'
_access_token = None
_claims = None
_TEST = {
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
@@ -22,18 +32,64 @@ class TouTvIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
'skip': '404 Not Found',
}
def _real_initialize(self):
email, password = self._get_login_info()
if email is None:
return
state = 'http://ici.tou.tv//'
webpage = self._download_webpage(state, None, 'Downloading homepage')
toutvlogin = self._parse_json(self._search_regex(
r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json)
authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize'
login_webpage = self._download_webpage(
authorize_url, None, 'Downloading login page', query={
'client_id': toutvlogin['clientId'],
'redirect_uri': 'https://ici.tou.tv/login/loginCallback',
'response_type': 'token',
'scope': 'media-drmt openid profile email id.write media-validation.read.privileged',
'state': state,
})
login_form = self._search_regex(
r'(?s)(<form[^>]+id="Form-login".+?</form>)', login_webpage, 'login form')
form_data = self._hidden_inputs(login_form)
form_data.update({
'login-email': email,
'login-password': password,
})
post_url = extract_attributes(login_form).get('action') or authorize_url
_, urlh = self._download_webpage_handle(
post_url, None, 'Logging in', data=urlencode_postdata(form_data))
self._access_token = self._search_regex(
r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
urlh.geturl(), 'access token')
self._claims = self._download_json(
'https://services.radio-canada.ca/media/validation/v2/getClaims',
None, 'Extracting Claims', query={
'token': self._access_token,
'access_token': self._access_token,
})['claims']
def _real_extract(self, url):
path = self._match_id(url)
metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path)
if metadata.get('IsDrm'):
raise ExtractorError('This video is DRM protected.', expected=True)
video_id = metadata['IdMedia']
details = metadata['Details']
title = details['OriginalTitle']
video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id)
if self._access_token and self._claims:
video_url = smuggle_url(video_url, {
'access_token': self._access_token,
'claims': self._claims,
})
return {
'_type': 'url_transparent',
'url': 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id),
'url': video_url,
'id': video_id,
'title': title,
'thumbnail': details.get('ImageUrl'),
+32 -17
View File
@@ -2,9 +2,13 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
try_get,
update_url_query,
)
@@ -65,36 +69,47 @@ class TV4IE(InfoExtractor):
video_id = self._match_id(url)
info = self._download_json(
'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON')
'http://www.tv4play.se/player/assets/%s.json' % video_id,
video_id, 'Downloading video info JSON')
# If is_geo_restricted is true, it doesn't necessarily mean we can't download it
if info['is_geo_restricted']:
if info.get('is_geo_restricted'):
self.report_warning('This content might not be available in your country due to licensing restrictions.')
if info['requires_subscription']:
if info.get('requires_subscription'):
raise ExtractorError('This content requires subscription.', expected=True)
sources_data = self._download_json(
'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON')
sources = sources_data['playback']
title = info['title']
formats = []
for item in sources.get('items', {}).get('item', []):
ext, bitrate = item['mediaFormat'], item['bitrate']
formats.append({
'format_id': '%s_%s' % (ext, bitrate),
'tbr': bitrate,
'ext': ext,
'url': item['url'],
})
# http formats are linked with unresolvable host
for kind in ('hls', ''):
data = self._download_json(
'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id,
video_id, 'Downloading sources JSON', query={
'protocol': kind,
'videoFormat': 'MP4+WEBVTTS+WEBVTT',
})
item = try_get(data, lambda x: x['playback']['items']['item'], dict)
manifest_url = item.get('url')
if not isinstance(manifest_url, compat_str):
continue
if kind == 'hls':
formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=kind, fatal=False))
else:
formats.extend(self._extract_f4m_formats(
update_url_query(manifest_url, {'hdcore': '3.8.0'}),
video_id, f4m_id='hds', fatal=False))
self._sort_formats(formats)
return {
'id': video_id,
'title': info['title'],
'title': title,
'formats': formats,
'description': info.get('description'),
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
'duration': info.get('duration'),
'duration': int_or_none(info.get('duration')),
'thumbnail': info.get('image'),
'is_live': sources.get('live'),
'is_live': info.get('is_live') is True,
}
+2 -2
View File
@@ -48,8 +48,8 @@ class VierIE(InfoExtractor):
[r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
webpage, 'filename')
playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
formats = self._extract_wowza_formats(playlist_url, display_id)
self._sort_formats(formats)
title = self._og_search_title(webpage, default=display_id)
+2 -23
View File
@@ -25,29 +25,8 @@ class VODPlatformIE(InfoExtractor):
title = unescapeHTML(self._og_search_title(webpage))
hidden_inputs = self._hidden_inputs(webpage)
base_url = self._search_regex(
'(.*/)(?:playlist.m3u8|manifest.mpd)',
hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'],
'base url')
formats = self._extract_m3u8_formats(
base_url + 'playlist.m3u8', video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
formats.extend(self._extract_mpd_formats(
base_url + 'manifest.mpd', video_id,
mpd_id='dash', fatal=False))
rtmp_formats = self._extract_smil_formats(
base_url + 'jwplayer.smil', video_id, fatal=False)
for rtmp_format in rtmp_formats:
rtsp_format = rtmp_format.copy()
rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
del rtsp_format['play_path']
del rtsp_format['ext']
rtsp_format.update({
'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
'protocol': 'rtsp',
})
formats.extend([rtmp_format, rtsp_format])
formats = self._extract_wowza_formats(
hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil'])
self._sort_formats(formats)
return {
+8 -32
View File
@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
float_or_none,
)
@@ -75,7 +74,6 @@ class VRTIE(InfoExtractor):
},
{
'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
'md5': '',
'info_dict': {
'id': '2377055',
'ext': 'mp4',
@@ -119,39 +117,17 @@ class VRTIE(InfoExtractor):
video_id, 'mp4', m3u8_id='hls', fatal=False))
if src:
if determine_ext(src) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
src.replace('playlist.m3u8', 'manifest.f4m'),
video_id, f4m_id='hds', fatal=False))
if 'data-video-geoblocking="true"' not in webpage:
rtmp_formats = self._extract_smil_formats(
src.replace('playlist.m3u8', 'jwplayer.smil'),
video_id, fatal=False)
formats.extend(rtmp_formats)
for rtmp_format in rtmp_formats:
rtmp_format_c = rtmp_format.copy()
rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
del rtmp_format_c['play_path']
del rtmp_format_c['ext']
http_format = rtmp_format_c.copy()
formats = self._extract_wowza_formats(src, video_id)
if 'data-video-geoblocking="true"' not in webpage:
for f in formats:
if f['url'].startswith('rtsp://'):
http_format = f.copy()
http_format.update({
'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
'format_id': rtmp_format['format_id'].replace('rtmp', 'http'),
'url': f['url'].replace('rtsp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
'format_id': f['format_id'].replace('rtsp', 'http'),
'protocol': 'http',
})
rtsp_format = rtmp_format_c.copy()
rtsp_format.update({
'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
'protocol': 'rtsp',
})
formats.extend([http_format, rtsp_format])
else:
formats.extend(self._extract_f4m_formats(
'%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False))
formats.append(http_format)
if not formats and 'data-video-geoblocking="true"' in webpage:
self.raise_geo_restricted('This video is only available in Belgium')
+55
View File
@@ -0,0 +1,55 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class VyboryMosIE(InfoExtractor):
_VALID_URL = r'https?://vybory\.mos\.ru/(?:#precinct/|account/channels\?.*?\bstation_id=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://vybory.mos.ru/#precinct/13636',
'info_dict': {
'id': '13636',
'ext': 'mp4',
'title': 're:^Участковая избирательная комиссия №2231 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'Россия, Москва, улица Введенского, 32А',
'is_live': True,
},
'params': {
'skip_download': True,
}
}, {
'url': 'http://vybory.mos.ru/account/channels?station_id=13636',
'only_matching': True,
}]
def _real_extract(self, url):
station_id = self._match_id(url)
channels = self._download_json(
'http://vybory.mos.ru/account/channels?station_id=%s' % station_id,
station_id)
formats = []
for cam_num, (sid, hosts, name, _) in enumerate(channels, 1):
for num, host in enumerate(hosts, 1):
formats.append({
'url': 'http://%s/master.m3u8?sid=%s' % (host, sid),
'ext': 'mp4',
'format_id': 'camera%d-host%d' % (cam_num, num),
'format_note': '%s, %s' % (name, host),
})
info = self._download_json(
'http://vybory.mos.ru/json/voting_stations/136/%s.json' % station_id,
station_id, 'Downloading station info')
title = info['name']
return {
'id': station_id,
'title': self._live_title(title),
'description': info.get('address'),
'is_live': True,
'formats': formats,
}
+5 -3
View File
@@ -124,12 +124,14 @@ class XFileShareIE(InfoExtractor):
webpage = self._download_webpage(req, video_id, 'Downloading video page')
title = (self._search_regex(
[r'style="z-index: [0-9]+;">([^<]+)</span>',
(r'style="z-index: [0-9]+;">([^<]+)</span>',
r'<td nowrap>([^<]+)</td>',
r'h4-fine[^>]*>([^<]+)<',
r'>Watch (.+) ',
r'<h2 class="video-page-head">([^<]+)</h2>'],
webpage, 'title', default=None) or self._og_search_title(webpage)).strip()
r'<h2 class="video-page-head">([^<]+)</h2>',
r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<'), # streamin.to
webpage, 'title', default=None) or self._og_search_title(
webpage, default=None) or video_id).strip()
def extract_video_url(default=NO_DEFAULT):
return self._search_regex(
+31 -32
View File
@@ -139,6 +139,30 @@ class FFmpegPostProcessor(PostProcessor):
def probe_executable(self):
return self._paths[self.probe_basename]
def get_audio_codec(self, path):
if not self.probe_available:
raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
try:
cmd = [
encodeFilename(self.probe_executable, True),
encodeArgument('-show_streams'),
encodeFilename(self._ffmpeg_filename_argument(path), True)]
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
output = handle.communicate()[0]
if handle.wait() != 0:
return None
except (IOError, OSError):
return None
audio_codec = None
for line in output.decode('ascii', 'ignore').split('\n'):
if line.startswith('codec_name='):
audio_codec = line.split('=')[1].strip()
elif line.strip() == 'codec_type=audio' and audio_codec is not None:
return audio_codec
return None
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
self.check_version()
@@ -188,31 +212,6 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
self._preferredquality = preferredquality
self._nopostoverwrites = nopostoverwrites
def get_audio_codec(self, path):
if not self.probe_available:
raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
try:
cmd = [
encodeFilename(self.probe_executable, True),
encodeArgument('-show_streams'),
encodeFilename(self._ffmpeg_filename_argument(path), True)]
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
output = handle.communicate()[0]
if handle.wait() != 0:
return None
except (IOError, OSError):
return None
audio_codec = None
for line in output.decode('ascii', 'ignore').split('\n'):
if line.startswith('codec_name='):
audio_codec = line.split('=')[1].strip()
elif line.strip() == 'codec_type=audio' and audio_codec is not None:
return audio_codec
return None
def run_ffmpeg(self, path, out_path, codec, more_opts):
if codec is None:
acodec_opts = []
@@ -504,15 +503,15 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
class FFmpegFixupM3u8PP(FFmpegPostProcessor):
def run(self, info):
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
if self.get_audio_codec(filename) == 'aac':
temp_filename = prepend_extension(filename, 'temp')
options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return [], info
+1 -1
View File
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.09.15'
__version__ = '2016.09.18'