1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2026-05-10 23:53:44 +00:00

Compare commits

...

13 Commits

Author SHA1 Message Date
Sergey M․ 0e94b4713d release 2016.07.06 2016-07-06 00:54:23 +07:00
Sergey M․ a6d3b89feb [prosiebensat1] Make downloading urls JSON non fatal 2016-07-06 00:52:48 +07:00
Remita Amine 6c26815d63 [onionstudios] fix info extraction 2016-07-05 18:05:07 +01:00
Sergey M․ 73c4ac2c95 [youtube:channel] Improve channel id extraction and detect unavailable channels (Closes #10009) 2016-07-05 23:30:44 +07:00
Remita Amine 84f214d840 [prosiebensat1] extract all formats 2016-07-05 17:11:45 +01:00
Remita Amine e3f88be7a9 [rtvnh] extract all formats 2016-07-05 14:45:39 +01:00
Remita Amine 31af3e35e0 [sandia] remove unused imports 2016-07-05 13:39:24 +01:00
Remita Amine 94a5cff91d [sendia] fix info extraction 2016-07-05 13:37:46 +01:00
Remita Amine 77082c7b9e [slideshare] fix description extraction 2016-07-05 12:01:04 +01:00
Remita Amine 252a1f75d2 [spiegel] improve info extraction 2016-07-05 11:46:25 +01:00
Remita Amine 5abf513cf8 [stitcher] fix episode config extraction 2016-07-05 10:44:16 +01:00
Yen Chi Hsuan c6054e3201 [xuite] Support videos with already encoded media id 2016-07-05 14:26:42 +08:00
Yen Chi Hsuan 4080530624 [youtube:shared] Recognize the new 'shared' URLs
Closes #10007
2016-07-05 13:15:05 +08:00
13 changed files with 267 additions and 220 deletions
+3 -3
View File
@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.05**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.06**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.07.05
[debug] youtube-dl version 2016.07.06
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
+1
View File
@@ -857,6 +857,7 @@
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search_url**: YouTube.com search URLs
- **youtube:shared**
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
+1
View File
@@ -1066,6 +1066,7 @@ from .youtube import (
YoutubeSearchDateIE,
YoutubeSearchIE,
YoutubeSearchURLIE,
YoutubeSharedVideoIE,
YoutubeShowIE,
YoutubeSubscriptionsIE,
YoutubeTruncatedIDIE,
+25 -36
View File
@@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
float_or_none,
)
@@ -15,15 +16,14 @@ class OnionStudiosIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
'md5': 'd4851405d31adfadf71cd7a487b765bb',
'md5': 'e49f947c105b8a78a675a0ee1bddedfe',
'info_dict': {
'id': '2937',
'ext': 'mp4',
'title': 'Hannibal charges forward, stops for a cocktail',
'description': 'md5:e786add7f280b7f0fe237b64cc73df76',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'The A.V. Club',
'uploader_id': 'TheAVClub',
'uploader_id': 'the-av-club',
},
}, {
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
@@ -40,50 +40,39 @@ class OnionStudiosIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://www.onionstudios.com/embed?id=%s' % video_id, video_id)
video_data = self._download_json(
'http://www.onionstudios.com/video/%s.json' % video_id, video_id)
title = video_data['title']
formats = []
for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage):
ext = determine_ext(src)
if ext == 'm3u8':
for source in video_data.get('sources', []):
source_url = source.get('url')
if not source_url:
continue
content_type = source.get('content_type')
ext = determine_ext(source_url)
if content_type == 'application/x-mpegURL' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
else:
height = int_or_none(self._search_regex(
r'/(\d+)\.%s' % ext, src, 'height', default=None))
tbr = int_or_none(source.get('bitrate'))
formats.append({
'format_id': ext + ('-%sp' % height if height else ''),
'url': src,
'height': height,
'format_id': ext + ('-%d' % tbr if tbr else ''),
'url': source_url,
'width': int_or_none(source.get('width')),
'tbr': tbr,
'ext': ext,
'preference': 1,
})
self._sort_formats(formats)
title = self._search_regex(
r'share_title\s*=\s*(["\'])(?P<title>[^\1]+?)\1',
webpage, 'title', group='title')
description = self._search_regex(
r'share_description\s*=\s*(["\'])(?P<description>[^\'"]+?)\1',
webpage, 'description', default=None, group='description')
thumbnail = self._search_regex(
r'poster\s*=\s*(["\'])(?P<thumbnail>[^\1]+?)\1',
webpage, 'thumbnail', default=False, group='thumbnail')
uploader_id = self._search_regex(
r'twitter_handle\s*=\s*(["\'])(?P<uploader_id>[^\1]+?)\1',
webpage, 'uploader id', fatal=False, group='uploader_id')
uploader = self._search_regex(
r'window\.channelName\s*=\s*(["\'])Embedded:(?P<uploader>[^\1]+?)\1',
webpage, 'uploader', default=False, group='uploader')
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': video_data.get('poster_url'),
'uploader': video_data.get('channel_name'),
'uploader_id': video_data.get('channel_slug'),
'duration': float_or_none(video_data.get('duration', 1000)),
'tags': video_data.get('tags'),
'formats': formats,
}
+90 -77
View File
@@ -5,7 +5,7 @@ import re
from hashlib import sha1
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
@@ -71,6 +71,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
@@ -86,6 +87,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
@@ -101,6 +103,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
@@ -116,6 +119,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
@@ -131,6 +135,7 @@ class ProSiebenSat1IE(InfoExtractor):
# rtmp download
'skip_download': True,
},
'skip': 'This video is unavailable',
},
{
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
@@ -227,70 +232,42 @@ class ProSiebenSat1IE(InfoExtractor):
]
def _extract_clip(self, url, webpage):
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
clip_id = self._html_search_regex(
self._CLIPID_REGEXES, webpage, 'clip id')
access_token = 'prosieben'
client_name = 'kolibri-2.0.19-splec4'
client_location = url
videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse_urlencode({
'access_token': access_token,
'client_location': client_location,
'client_name': client_name,
'ids': clip_id,
})
video = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')[0]
video = self._download_json(
'http://vas.sim-technik.de/vas/live/v2/videos',
clip_id, 'Downloading videos JSON', query={
'access_token': access_token,
'client_location': client_location,
'client_name': client_name,
'ids': clip_id,
})[0]
if video.get('is_protected') is True:
raise ExtractorError('This video is DRM protected.', expected=True)
duration = float_or_none(video.get('duration'))
source_ids = [source['id'] for source in video['sources']]
source_ids_str = ','.join(map(str, source_ids))
source_ids = [compat_str(source['id']) for source in video['sources']]
g = '01!8d8F_)r9]4s[qeuXfP%'
client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]).encode('utf-8')).hexdigest()
client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name])
.encode('utf-8')).hexdigest()
sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse_urlencode({
'access_token': access_token,
'client_id': client_id,
'client_location': client_location,
'client_name': client_name,
}))
sources = self._download_json(sources_api_url, clip_id, 'Downloading sources JSON')
sources = self._download_json(
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
clip_id, 'Downloading sources JSON', query={
'access_token': access_token,
'client_id': client_id,
'client_location': client_location,
'client_name': client_name,
})
server_id = sources['server_id']
client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id,
client_location, source_ids_str, g, client_name])
.encode('utf-8')).hexdigest()
url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse_urlencode({
'access_token': access_token,
'client_id': client_id,
'client_location': client_location,
'client_name': client_name,
'server_id': server_id,
'source_ids': source_ids_str,
}))
urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
description = self._html_search_regex(self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._html_search_regex(
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
formats = []
urls_sources = urls['sources']
if isinstance(urls_sources, dict):
urls_sources = urls_sources.values()
def fix_bitrate(bitrate):
bitrate = int_or_none(bitrate)
@@ -298,37 +275,73 @@ class ProSiebenSat1IE(InfoExtractor):
return None
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
for source in urls_sources:
protocol = source['protocol']
source_url = source['url']
if protocol == 'rtmp' or protocol == 'rtmpe':
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
if not mobj:
formats = []
for source_id in source_ids:
client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, client_location, source_id, g, client_name]).encode('utf-8')).hexdigest()
urls = self._download_json(
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
clip_id, 'Downloading urls JSON', fatal=False, query={
'access_token': access_token,
'client_id': client_id,
'client_location': client_location,
'client_name': client_name,
'server_id': server_id,
'source_ids': source_id,
})
if not urls:
continue
if urls.get('status_code') != 0:
raise ExtractorError('This video is unavailable', expected=True)
urls_sources = urls['sources']
if isinstance(urls_sources, dict):
urls_sources = urls_sources.values()
for source in urls_sources:
source_url = source.get('url')
if not source_url:
continue
path = mobj.group('path')
mp4colon_index = path.rfind('mp4:')
app = path[:mp4colon_index]
play_path = path[mp4colon_index:]
formats.append({
'url': '%s/%s' % (mobj.group('url'), app),
'app': app,
'play_path': play_path,
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
'page_url': 'http://www.prosieben.de',
'vbr': fix_bitrate(source['bitrate']),
'ext': 'mp4',
'format_id': '%s_%s' % (source['cdn'], source['bitrate']),
})
elif 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
formats.extend(self._extract_f4m_formats(source_url, clip_id))
else:
formats.append({
'url': source_url,
'vbr': fix_bitrate(source['bitrate']),
})
protocol = source.get('protocol')
mimetype = source.get('mimetype')
if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
formats.extend(self._extract_f4m_formats(
source_url, clip_id, f4m_id='hds', fatal=False))
elif mimetype == 'application/x-mpegURL':
formats.extend(self._extract_m3u8_formats(
source_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
tbr = fix_bitrate(source['bitrate'])
if protocol in ('rtmp', 'rtmpe'):
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
if not mobj:
continue
path = mobj.group('path')
mp4colon_index = path.rfind('mp4:')
app = path[:mp4colon_index]
play_path = path[mp4colon_index:]
formats.append({
'url': '%s/%s' % (mobj.group('url'), app),
'app': app,
'play_path': play_path,
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
'page_url': 'http://www.prosieben.de',
'tbr': tbr,
'ext': 'flv',
'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
})
else:
formats.append({
'url': source_url,
'tbr': tbr,
'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
})
self._sort_formats(formats)
description = self._html_search_regex(
self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._html_search_regex(
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
return {
'id': clip_id,
'title': title,
+23 -9
View File
@@ -9,7 +9,7 @@ class RTVNHIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.rtvnh.nl/video/131946',
'md5': '6e1d0ab079e2a00b6161442d3ceacfc1',
'md5': 'cdbec9f44550763c8afc96050fa747dc',
'info_dict': {
'id': '131946',
'ext': 'mp4',
@@ -29,15 +29,29 @@ class RTVNHIE(InfoExtractor):
raise ExtractorError(
'%s returned error code %d' % (self.IE_NAME, status), expected=True)
formats = self._extract_smil_formats(
'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id, fatal=False)
formats = []
rtmp_formats = self._extract_smil_formats(
'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id)
formats.extend(rtmp_formats)
for item in meta['source']['fb']:
if item.get('type') == 'hls':
formats.extend(self._extract_m3u8_formats(
item['file'], video_id, ext='mp4', entry_protocol='m3u8_native'))
elif item.get('type') == '':
formats.append({'url': item['file']})
for rtmp_format in rtmp_formats:
rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
rtsp_format = rtmp_format.copy()
del rtsp_format['play_path']
del rtsp_format['ext']
rtsp_format.update({
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
'url': rtmp_url.replace('rtmp://', 'rtsp://'),
'protocol': 'rtsp',
})
formats.append(rtsp_format)
http_base_url = rtmp_url.replace('rtmp://', 'http://')
formats.extend(self._extract_m3u8_formats(
http_base_url + '/playlist.m3u8', video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
http_base_url + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
self._sort_formats(formats)
return {
+25 -75
View File
@@ -1,18 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import json
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
sanitized_Request,
unified_strdate,
)
@@ -27,7 +21,8 @@ class SandiaIE(InfoExtractor):
'ext': 'mp4',
'title': 'Xyce Software Training - Section 1',
'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
'upload_date': '20120904',
'upload_date': '20120409',
'timestamp': 1333983600,
'duration': 7794,
}
}
@@ -35,81 +30,36 @@ class SandiaIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
webpage = self._download_webpage(req, video_id)
presentation_data = self._download_json(
'http://digitalops.sandia.gov/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
video_id, data=json.dumps({
'getPlayerOptionsRequest': {
'ResourceId': video_id,
'QueryString': '',
}
}), headers={
'Content-Type': 'application/json; charset=utf-8',
})['d']['Presentation']
js_path = self._search_regex(
r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
webpage, 'JS code URL')
js_url = compat_urlparse.urljoin(url, js_path)
js_code = self._download_webpage(
js_url, video_id, note='Downloading player')
def extract_str(key, **args):
return self._search_regex(
r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
js_code, key, **args)
def extract_data(key, **args):
data_json = extract_str(key, **args)
if data_json is None:
return data_json
return self._parse_json(
data_json, video_id, transform_source=js_to_json)
title = presentation_data['Title']
formats = []
for i in itertools.count():
fd = extract_data('VideoUrls[%d]' % i, default=None)
if fd is None:
break
formats.append({
'format_id': '%s' % i,
'format_note': fd['MimeType'].partition('/')[2],
'ext': mimetype2ext(fd['MimeType']),
'url': fd['Location'],
'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
})
for stream in presentation_data.get('Streams', []):
for fd in stream.get('VideoUrls', []):
formats.append({
'format_id': fd['MediaType'],
'format_note': fd['MimeType'].partition('/')[2],
'ext': mimetype2ext(fd['MimeType']),
'url': fd['Location'],
'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
})
self._sort_formats(formats)
slide_baseurl = compat_urlparse.urljoin(
url, extract_data('SlideBaseUrl'))
slide_template = slide_baseurl + re.sub(
r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
slides = []
last_slide_time = 0
for i in itertools.count(1):
sd = extract_str('Slides[%d]' % i, default=None)
if sd is None:
break
timestamp = int_or_none(self._search_regex(
r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
sd, 'slide %s timestamp' % i, fatal=False))
slides.append({
'url': slide_template % i,
'duration': timestamp - last_slide_time,
})
last_slide_time = timestamp
formats.append({
'format_id': 'slides',
'protocol': 'slideshow',
'url': json.dumps(slides),
'preference': -10000, # Downloader not yet written
})
self._sort_formats(formats)
title = extract_data('Title')
description = extract_data('Description', fatal=False)
duration = int_or_none(extract_data(
'Duration', fatal=False), scale=1000)
upload_date = unified_strdate(extract_data('AirDate', fatal=False))
return {
'id': video_id,
'title': title,
'description': description,
'description': presentation_data.get('Description'),
'formats': formats,
'upload_date': upload_date,
'duration': duration,
'timestamp': int_or_none(presentation_data.get('UnixTime'), 1000),
'duration': int_or_none(presentation_data.get('Duration'), 1000),
}
+3 -2
View File
@@ -9,6 +9,7 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
get_element_by_id,
)
@@ -40,7 +41,7 @@ class SlideshareIE(InfoExtractor):
bucket = info['jsplayer']['video_bucket']
ext = info['jsplayer']['video_extension']
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
description = self._html_search_regex(
description = get_element_by_id('slideshow-description-paragraph', webpage) or self._html_search_regex(
r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage,
'description', fatal=False)
@@ -51,5 +52,5 @@ class SlideshareIE(InfoExtractor):
'ext': ext,
'url': video_url,
'thumbnail': info['slideshow']['pin_image_url'],
'description': description,
'description': description.strip() if description else None,
}
+15 -6
View File
@@ -4,8 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from .spiegeltv import SpiegeltvIE
from ..compat import compat_urlparse
from ..utils import (
extract_attributes,
unified_strdate,
get_element_by_attribute,
)
class SpiegelIE(InfoExtractor):
@@ -19,6 +24,7 @@ class SpiegelIE(InfoExtractor):
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
'description': 'md5:8029d8310232196eb235d27575a8b9f4',
'duration': 49,
'upload_date': '20130311',
},
}, {
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
@@ -29,6 +35,7 @@ class SpiegelIE(InfoExtractor):
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
'duration': 983,
'upload_date': '20131115',
},
}, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
@@ -38,6 +45,7 @@ class SpiegelIE(InfoExtractor):
'ext': 'mp4',
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
'upload_date': '20140904',
}
}, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
@@ -52,10 +60,10 @@ class SpiegelIE(InfoExtractor):
if SpiegeltvIE.suitable(handle.geturl()):
return self.url_result(handle.geturl(), 'Spiegeltv')
title = re.sub(r'\s+', ' ', self._html_search_regex(
r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>',
webpage, 'title'))
description = self._html_search_meta('description', webpage, 'description')
video_data = extract_attributes(self._search_regex(r'(<div[^>]+id="spVideoElements"[^>]+>)', webpage, 'video element', default=''))
title = video_data.get('data-video-title') or get_element_by_attribute('class', 'module-title', webpage)
description = video_data.get('data-video-teaser') or self._html_search_meta('description', webpage, 'description')
base_url = self._search_regex(
[r'server\s*:\s*(["\'])(?P<url>.+?)\1', r'var\s+server\s*=\s*"(?P<url>[^"]+)\"'],
@@ -87,8 +95,9 @@ class SpiegelIE(InfoExtractor):
return {
'id': video_id,
'title': title,
'description': description,
'description': description.strip() if description else None,
'duration': duration,
'upload_date': unified_strdate(video_data.get('data-video-date')),
'formats': formats,
}
+1 -1
View File
@@ -56,7 +56,7 @@ class StitcherIE(InfoExtractor):
episode = self._parse_json(
js_to_json(self._search_regex(
r'(?s)var\s+stitcher\s*=\s*({.+?});\n', webpage, 'episode config')),
r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')),
display_id)['config']['episode']
title = unescapeHTML(episode['title'])
+24 -6
View File
@@ -67,6 +67,20 @@ class XuiteIE(InfoExtractor):
'categories': ['電玩動漫'],
},
'skip': 'Video removed',
}, {
# Video with encoded media id
# from http://forgetfulbc.blogspot.com/2016/06/date.html
'url': 'http://vlog.xuite.net/embed/cE1xbENoLTI3NDQ3MzM2LmZsdg==?ar=0&as=0',
'info_dict': {
'id': 'cE1xbENoLTI3NDQ3MzM2LmZsdg==',
'ext': 'mp4',
'title': '男女平權只是口號?專家解釋約會時男生是否該幫女生付錢 (中字)',
'description': 'md5:f0abdcb69df300f522a5442ef3146f2a',
'timestamp': 1466160960,
'upload_date': '20160617',
'uploader': 'B.C. & Lowy',
'uploader_id': '232279340',
},
}, {
'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
'only_matching': True,
@@ -80,10 +94,9 @@ class XuiteIE(InfoExtractor):
def base64_encode_utf8(data):
return base64.b64encode(data.encode('utf-8')).decode('utf-8')
def _extract_flv_config(self, media_id):
base64_media_id = self.base64_encode_utf8(media_id)
def _extract_flv_config(self, encoded_media_id):
flv_config = self._download_xml(
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
'http://vlog.xuite.net/flash/player?media=%s' % encoded_media_id,
'flv config')
prop_dict = {}
for prop in flv_config.findall('./property'):
@@ -108,9 +121,14 @@ class XuiteIE(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, error_msg),
expected=True)
video_id = self._html_search_regex(
r'data-mediaid="(\d+)"', webpage, 'media id')
flv_config = self._extract_flv_config(video_id)
encoded_media_id = self._search_regex(
r'attributes\.name\s*=\s*"([^"]+)"', webpage,
'encoded media id', default=None)
if encoded_media_id is None:
video_id = self._html_search_regex(
r'data-mediaid="(\d+)"', webpage, 'media id')
encoded_media_id = self.base64_encode_utf8(video_id)
flv_config = self._extract_flv_config(encoded_media_id)
FORMATS = {
'audio': 'mp3',
+55 -4
View File
@@ -1730,6 +1730,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}
class YoutubeSharedVideoIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:shared'
_TEST = {
'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
'info_dict': {
'id': 'uPDB5I9wfp8',
'ext': 'webm',
'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
'upload_date': '20160219',
'uploader': 'Pocoyo - Português (BR)',
'uploader_id': 'PocoyoBrazil',
},
'add_ie': ['Youtube'],
'params': {
# There are already too many Youtube downloads
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
real_video_id = self._html_search_meta(
'videoId', webpage, 'YouTube video id', fatal=True)
return self.url_result(real_video_id, YoutubeIE.ie_key())
class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com playlists'
_VALID_URL = r"""(?x)(?:
@@ -1962,9 +1995,13 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
channel_playlist_id = self._html_search_meta(
'channelId', channel_page, 'channel id', default=None)
if not channel_playlist_id:
channel_playlist_id = self._search_regex(
r'data-(?:channel-external-|yt)id="([^"]+)"',
channel_page, 'channel id', default=None)
channel_url = self._html_search_meta(
('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
channel_page, 'channel url', default=None)
if channel_url:
channel_playlist_id = self._search_regex(
r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
channel_url, 'channel id', default=None)
if channel_playlist_id and channel_playlist_id.startswith('UC'):
playlist_id = 'UU' + channel_playlist_id[2:]
return self.url_result(
@@ -1987,6 +2024,15 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
for video_id, video_title in self.extract_videos_from_page(channel_page)]
return self.playlist_result(entries, channel_id)
try:
next(self._entries(channel_page, channel_id))
except StopIteration:
alert_message = self._html_search_regex(
r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
channel_page, 'alert', default=None, group='alert')
if alert_message:
raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
@@ -2000,7 +2046,8 @@ class YoutubeUserIE(YoutubeChannelIE):
'url': 'https://www.youtube.com/user/TheLinuxFoundation',
'playlist_mincount': 320,
'info_dict': {
'title': 'TheLinuxFoundation',
'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
'title': 'Uploads from The Linux Foundation',
}
}, {
'url': 'ytuser:phihag',
@@ -2008,6 +2055,10 @@ class YoutubeUserIE(YoutubeChannelIE):
}, {
'url': 'https://www.youtube.com/c/gametrailers',
'only_matching': True,
}, {
# This channel is not available.
'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
'only_matching': True,
}]
@classmethod
+1 -1
View File
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.07.05'
__version__ = '2016.07.06'