mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2026-06-11 23:20:15 +00:00
Compare commits
31 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 16a09aefe3 | |||
| fa7a6e6de6 | |||
| ab87c26009 | |||
| 91bc57e4c5 | |||
| e5d39886ec | |||
| 751c89a27d | |||
| c2d7d76efd | |||
| f67177cae8 | |||
| 5935ef3c5d | |||
| 1183e22c7e | |||
| 72950c4dce | |||
| 334f41e0d8 | |||
| 022250a594 | |||
| 76b5f99617 | |||
| d35dc344af | |||
| 51350db5a3 | |||
| 0563f7ac6e | |||
| 413c1f8e2f | |||
| 8068296276 | |||
| 4db79fa1bc | |||
| b2a19e3829 | |||
| 3266d08af2 | |||
| 0254f93b08 | |||
| 40158f55c9 | |||
| 3dfceb286c | |||
| 57e2cb6c3a | |||
| 6f1b2374af | |||
| d24bd4ffdb | |||
| b9859e2735 | |||
| 34c3bbe1bd | |||
| e5106ba402 |
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.14**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.17**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.04.14
|
||||
[debug] youtube-dl version 2017.04.17
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
||||
@@ -211,3 +211,4 @@ Juanjo Benages
|
||||
Xiao Di Guan
|
||||
Thomas Winant
|
||||
Daniel Twardowski
|
||||
Jeremie Jarosh
|
||||
|
||||
@@ -1,3 +1,38 @@
|
||||
version 2017.04.17
|
||||
|
||||
Extractors
|
||||
* [limelight] Improve extraction LimelightEmbeddedPlayerFlash media embeds and
|
||||
add support for channel and channelList embeds
|
||||
* [generic] Extract multiple Limelight embeds (#12761)
|
||||
+ [itv] Extract series metadata
|
||||
* [itv] Fix RTMP formats downloading (#12759)
|
||||
* [itv] Use native HLS downloader by default
|
||||
+ [go90] Extract subtitles (#12752)
|
||||
+ [go90] Extract series metadata (#12752)
|
||||
|
||||
|
||||
version 2017.04.16
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Apply expand_path after output template substitution
|
||||
+ [YoutubeDL] Propagate overridden meta fields to extraction results of type
|
||||
url (#11163)
|
||||
|
||||
Extractors
|
||||
+ [generic] Extract RSS entries as url_transparent (#11163)
|
||||
+ [streamango] Add support for streamango.com (#12643)
|
||||
+ [wsj:article] Add support for articles (#12558)
|
||||
* [brightcove] Relax video tag embeds extraction and validate ambiguous embeds'
|
||||
URLs (#9163, #12005, #12178, #12480)
|
||||
+ [udemy] Add support for react rendition (#12744)
|
||||
|
||||
|
||||
version 2017.04.15
|
||||
|
||||
Extractors
|
||||
* [youku] Fix fileid extraction (#12741, #12743)
|
||||
|
||||
|
||||
version 2017.04.14
|
||||
|
||||
Core
|
||||
|
||||
@@ -745,6 +745,7 @@
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **Streamable**
|
||||
- **Streamango**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
- **StreetVoice**
|
||||
@@ -966,6 +967,7 @@
|
||||
- **wrzuta.pl**
|
||||
- **wrzuta.pl:playlist**
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **WSJArticle**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
|
||||
|
||||
@@ -755,6 +755,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'_type': 'url_transparent',
|
||||
'url': 'foo2:',
|
||||
'ie_key': 'Foo2',
|
||||
'title': 'foo1 title'
|
||||
}
|
||||
|
||||
class Foo2IE(InfoExtractor):
|
||||
@@ -771,7 +772,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
_VALID_URL = r'foo3:'
|
||||
|
||||
def _real_extract(self, url):
|
||||
return _make_result([{'url': TEST_URL}])
|
||||
return _make_result([{'url': TEST_URL}], title='foo3 title')
|
||||
|
||||
ydl.add_info_extractor(Foo1IE(ydl))
|
||||
ydl.add_info_extractor(Foo2IE(ydl))
|
||||
@@ -779,6 +780,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
ydl.extract_info('foo1:')
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['url'], TEST_URL)
|
||||
self.assertEqual(downloaded['title'], 'foo1 title')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -672,8 +672,7 @@ class YoutubeDL(object):
|
||||
FORMAT_RE.format(numeric_field),
|
||||
r'%({0})s'.format(numeric_field), outtmpl)
|
||||
|
||||
tmpl = expand_path(outtmpl)
|
||||
filename = tmpl % template_dict
|
||||
filename = expand_path(outtmpl % template_dict)
|
||||
# Temporary fix for #4787
|
||||
# 'Treat' all problem characters by passing filename through preferredencoding
|
||||
# to workaround encoding issues with subprocess on python2 @ Windows
|
||||
@@ -851,7 +850,14 @@ class YoutubeDL(object):
|
||||
new_result = info.copy()
|
||||
new_result.update(force_properties)
|
||||
|
||||
assert new_result.get('_type') != 'url_transparent'
|
||||
# Extracted info may not be a video result (i.e.
|
||||
# info.get('_type', 'video') != video) but rather an url or
|
||||
# url_transparent. In such cases outer metadata (from ie_result)
|
||||
# should be propagated to inner one (info). For this to happen
|
||||
# _type of info should be overridden with url_transparent. This
|
||||
# fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
|
||||
if new_result.get('_type') == 'url':
|
||||
new_result['_type'] = 'url_transparent'
|
||||
|
||||
return self.process_ie_result(
|
||||
new_result, download=download, extra_info=extra_info)
|
||||
|
||||
@@ -17,6 +17,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
@@ -109,6 +110,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'upload_date': '20140827',
|
||||
'uploader_id': '710858724001',
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
},
|
||||
{
|
||||
# playlist with 'videoList'
|
||||
@@ -487,12 +489,13 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
def _extract_urls(ie, webpage):
|
||||
# Reference:
|
||||
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
|
||||
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
|
||||
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
|
||||
# 4. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
|
||||
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag
|
||||
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
|
||||
# 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html
|
||||
# 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
|
||||
|
||||
entries = []
|
||||
|
||||
@@ -501,22 +504,48 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||
entries.append(url if url.startswith('http') else 'http:' + url)
|
||||
|
||||
# Look for embed_in_page embeds [2]
|
||||
for video_id, account_id, player_id, embed in re.findall(
|
||||
# According to examples from [3] it's unclear whether video id
|
||||
# may be optional and what to do when it is
|
||||
# According to [4] data-video-id may be prefixed with ref:
|
||||
r'''(?sx)
|
||||
<video[^>]+
|
||||
data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*?
|
||||
</video>.*?
|
||||
<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
|
||||
# Look for <video> tags [2] and embed_in_page embeds [3]
|
||||
# [2] looks like:
|
||||
for video, script_tag, account_id, player_id, embed in re.findall(
|
||||
r'''(?isx)
|
||||
(<video\s+[^>]+>)
|
||||
(?:.*?
|
||||
(<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
|
||||
)
|
||||
)?
|
||||
''', webpage):
|
||||
entries.append(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
|
||||
% (account_id, player_id, embed, video_id))
|
||||
attrs = extract_attributes(video)
|
||||
|
||||
# According to examples from [4] it's unclear whether video id
|
||||
# may be optional and what to do when it is
|
||||
video_id = attrs.get('data-video-id')
|
||||
if not video_id:
|
||||
continue
|
||||
|
||||
account_id = account_id or attrs.get('data-account')
|
||||
if not account_id:
|
||||
continue
|
||||
|
||||
player_id = player_id or attrs.get('data-player') or 'default'
|
||||
embed = embed or attrs.get('data-embed') or 'default'
|
||||
|
||||
bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (
|
||||
account_id, player_id, embed, video_id)
|
||||
|
||||
# Some brightcove videos may be embedded with video tag only and
|
||||
# without script tag or any mentioning of brightcove at all. Such
|
||||
# embeds are considered ambiguous since they are matched based only
|
||||
# on data-video-id and data-account attributes and in the wild may
|
||||
# not be brightcove embeds at all. Let's check reconstructed
|
||||
# brightcove URLs in case of such embeds and only process valid
|
||||
# ones. By this we ensure there is indeed a brightcove embed.
|
||||
if not script_tag and not ie._is_valid_url(
|
||||
bc_url, video_id, 'possible brightcove video'):
|
||||
continue
|
||||
|
||||
entries.append(bc_url)
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
@@ -7,8 +7,8 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
# ExtractorError,
|
||||
# HEADRequest,
|
||||
int_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
|
||||
@@ -48,7 +48,6 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
|
||||
def _extract_media_info(self, media):
|
||||
video_id = compat_str(media['id'])
|
||||
limelight_media_id = media['limelight_media_id']
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
|
||||
@@ -939,6 +939,7 @@ from .srmediathek import SRMediathekIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamable import StreamableIE
|
||||
from .streamango import StreamangoIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
@@ -1233,7 +1234,10 @@ from .wrzuta import (
|
||||
WrzutaIE,
|
||||
WrzutaPlaylistIE,
|
||||
)
|
||||
from .wsj import WSJIE
|
||||
from .wsj import (
|
||||
WSJIE,
|
||||
WSJArticleIE,
|
||||
)
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
|
||||
@@ -85,6 +85,7 @@ from .ustream import UstreamIE
|
||||
from .openload import OpenloadIE
|
||||
from .videopress import VideoPressIE
|
||||
from .rutube import RutubeIE
|
||||
from .limelight import LimelightBaseIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -449,6 +450,59 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
},
|
||||
{
|
||||
# Brightcove with UUID in videoPlayer
|
||||
'url': 'http://www8.hp.com/cn/zh/home.html',
|
||||
'info_dict': {
|
||||
'id': '5255815316001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprocket Video - China',
|
||||
'description': 'Sprocket Video - China',
|
||||
'uploader': 'HP-Video Gallery',
|
||||
'timestamp': 1482263210,
|
||||
'upload_date': '20161220',
|
||||
'uploader_id': '1107601872001',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
'skip': 'video rotates...weekly?',
|
||||
},
|
||||
{
|
||||
# Brightcove:new type [2].
|
||||
'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
|
||||
'md5': '2b35148fcf48da41c9fb4591650784f3',
|
||||
'info_dict': {
|
||||
'id': '5348741021001',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20170306',
|
||||
'uploader_id': '4191638492001',
|
||||
'timestamp': 1488769918,
|
||||
'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
|
||||
|
||||
},
|
||||
},
|
||||
{
|
||||
# Alternative brightcove <video> attributes
|
||||
'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
|
||||
'info_dict': {
|
||||
'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
|
||||
'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '732d22ba3d33f2f3fc253c39f8f36523',
|
||||
'info_dict': {
|
||||
'id': '5311302538001',
|
||||
'ext': 'mp4',
|
||||
'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
|
||||
'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
|
||||
'timestamp': 1486321708,
|
||||
'upload_date': '20170205',
|
||||
'uploader_id': '800000640001',
|
||||
},
|
||||
'only_matching': True,
|
||||
}],
|
||||
},
|
||||
{
|
||||
# Brightcove with UUID in videoPlayer
|
||||
'url': 'http://www8.hp.com/cn/zh/home.html',
|
||||
@@ -1598,6 +1652,15 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': [SenateISVPIE.ie_key()],
|
||||
},
|
||||
{
|
||||
# Limelight embeds (1 channel embed + 4 media embeds)
|
||||
'url': 'http://www.sedona.com/FacilitatorTraining2017',
|
||||
'info_dict': {
|
||||
'id': 'FacilitatorTraining2017',
|
||||
'title': 'Facilitator Training 2017',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@@ -1640,7 +1703,7 @@ class GenericIE(InfoExtractor):
|
||||
continue
|
||||
|
||||
entries.append({
|
||||
'_type': 'url',
|
||||
'_type': 'url_transparent',
|
||||
'url': next_url,
|
||||
'title': it.find('title').text,
|
||||
})
|
||||
@@ -1900,7 +1963,6 @@ class GenericIE(InfoExtractor):
|
||||
# Look for Brightcove Legacy Studio embeds
|
||||
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
|
||||
if bc_urls:
|
||||
self.to_screen('Brightcove video detected.')
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': smuggle_url(bc_url, {'Referer': url}),
|
||||
@@ -1915,7 +1977,7 @@ class GenericIE(InfoExtractor):
|
||||
}
|
||||
|
||||
# Look for Brightcove New Studio embeds
|
||||
bc_urls = BrightcoveNewIE._extract_urls(webpage)
|
||||
bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
|
||||
if bc_urls:
|
||||
return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
|
||||
|
||||
@@ -2431,6 +2493,11 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(piksel_url, PikselIE.ie_key())
|
||||
|
||||
# Look for Limelight embeds
|
||||
limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
|
||||
if limelight_urls:
|
||||
return self.playlist_result(
|
||||
limelight_urls, video_id, video_title, video_description)
|
||||
|
||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||
if mobj:
|
||||
lm = {
|
||||
|
||||
@@ -5,6 +5,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
@@ -18,7 +19,7 @@ class Go90IE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '84BUqjLpf9D',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inside The Utah Coalition Against Pornography Convention',
|
||||
'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention',
|
||||
'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
|
||||
'timestamp': 1491868800,
|
||||
'upload_date': '20170411',
|
||||
@@ -32,11 +33,28 @@ class Go90IE(InfoExtractor):
|
||||
video_id, headers={
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
}, data=b'{"client":"web","device_type":"pc"}')
|
||||
title = video_data['title']
|
||||
main_video_asset = video_data['main_video_asset']
|
||||
|
||||
episode_number = int_or_none(video_data.get('episode_number'))
|
||||
series = None
|
||||
season = None
|
||||
season_id = None
|
||||
season_number = None
|
||||
for metadata in video_data.get('__children', {}).get('Item', {}).values():
|
||||
if metadata.get('type') == 'show':
|
||||
series = metadata.get('title')
|
||||
elif metadata.get('type') == 'season':
|
||||
season = metadata.get('title')
|
||||
season_id = metadata.get('id')
|
||||
season_number = int_or_none(metadata.get('season_number'))
|
||||
|
||||
title = episode = video_data.get('title') or series
|
||||
if series and series != title:
|
||||
title = '%s - %s' % (series, title)
|
||||
|
||||
thumbnails = []
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for asset in video_data.get('assets'):
|
||||
if asset.get('id') == main_video_asset:
|
||||
for source in asset.get('sources', []):
|
||||
@@ -70,6 +88,15 @@ class Go90IE(InfoExtractor):
|
||||
'height': int_or_none(source.get('height')),
|
||||
'tbr': int_or_none(source.get('bitrate')),
|
||||
})
|
||||
|
||||
for caption in asset.get('caption_metadata', []):
|
||||
caption_url = caption.get('source_url')
|
||||
if not caption_url:
|
||||
continue
|
||||
subtitles.setdefault(caption.get('language', 'en'), []).append({
|
||||
'url': caption_url,
|
||||
'ext': determine_ext(caption_url, 'vtt'),
|
||||
})
|
||||
elif asset.get('type') == 'image':
|
||||
asset_location = asset.get('location')
|
||||
if not asset_location:
|
||||
@@ -89,4 +116,11 @@ class Go90IE(InfoExtractor):
|
||||
'description': video_data.get('short_description'),
|
||||
'like_count': int_or_none(video_data.get('like_count')),
|
||||
'timestamp': parse_iso8601(video_data.get('released_at')),
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season': season,
|
||||
'season_id': season_id,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@@ -116,13 +116,25 @@ class ITVIE(InfoExtractor):
|
||||
if not play_path:
|
||||
continue
|
||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||
formats.append({
|
||||
f = {
|
||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
# Providing this swfVfy allows to avoid truncated downloads
|
||||
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
|
||||
'page_url': url,
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
})
|
||||
}
|
||||
app = self._search_regex(
|
||||
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
|
||||
if app:
|
||||
f.update({
|
||||
'url': rtmp_url.split('?', 1)[0],
|
||||
'app': app,
|
||||
})
|
||||
else:
|
||||
f['url'] = rtmp_url
|
||||
formats.append(f)
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist')
|
||||
hmac = params.get('data-video-hmac')
|
||||
@@ -172,7 +184,9 @@ class ITVIE(InfoExtractor):
|
||||
href = ios_base_url + href
|
||||
ext = determine_ext(href)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(href, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
@@ -189,7 +203,8 @@ class ITVIE(InfoExtractor):
|
||||
'ext': 'ttml' if ext == 'xml' else ext,
|
||||
})
|
||||
|
||||
return {
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
@@ -198,4 +213,5 @@ class ITVIE(InfoExtractor):
|
||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||
'duartion': parse_duration(xpath_text(playlist, 'Duration')),
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -18,6 +19,42 @@ class LimelightBaseIE(InfoExtractor):
|
||||
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
|
||||
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage, source_url):
|
||||
lm = {
|
||||
'Media': 'media',
|
||||
'Channel': 'channel',
|
||||
'ChannelList': 'channel_list',
|
||||
}
|
||||
entries = []
|
||||
for kind, video_id in re.findall(
|
||||
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
|
||||
webpage):
|
||||
entries.append(cls.url_result(
|
||||
smuggle_url(
|
||||
'limelight:%s:%s' % (lm[kind], video_id),
|
||||
{'source_url': source_url}),
|
||||
'Limelight%s' % kind, video_id))
|
||||
for mobj in re.finditer(
|
||||
# As per [1] class attribute should be exactly equal to
|
||||
# LimelightEmbeddedPlayerFlash but numerous examples seen
|
||||
# that don't exactly match it (e.g. [2]).
|
||||
# 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
|
||||
# 2. http://www.sedona.com/FacilitatorTraining2017
|
||||
r'''(?sx)
|
||||
<object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
|
||||
<param[^>]+
|
||||
name=(["\'])flashVars\2[^>]+
|
||||
value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
|
||||
''', webpage):
|
||||
kind, video_id = mobj.group('kind'), mobj.group('id')
|
||||
entries.append(cls.url_result(
|
||||
smuggle_url(
|
||||
'limelight:%s:%s' % (kind, video_id),
|
||||
{'source_url': source_url}),
|
||||
'Limelight%s' % kind.capitalize(), video_id))
|
||||
return entries
|
||||
|
||||
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
|
||||
headers = {}
|
||||
if referer:
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class StreamangoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
|
||||
'md5': 'e992787515a182f55e38fc97588d802a',
|
||||
'info_dict': {
|
||||
'id': 'clapasobsptpkdfe',
|
||||
'ext': 'mp4',
|
||||
'title': '20170315_150006.mp4',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
formats = []
|
||||
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
|
||||
video = self._parse_json(
|
||||
format_, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not video:
|
||||
continue
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = determine_ext(src, default_ext=None)
|
||||
if video.get('type') == 'application/dash+xml' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': src,
|
||||
'ext': ext or 'mp4',
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'tbr': int_or_none(video.get('bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': url,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -212,12 +212,15 @@ class UdemyIE(InfoExtractor):
|
||||
thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl')
|
||||
duration = float_or_none(asset.get('data', {}).get('duration'))
|
||||
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
|
||||
formats = []
|
||||
|
||||
def extract_output_format(src):
|
||||
def extract_output_format(src, f_id):
|
||||
return {
|
||||
'url': src['url'],
|
||||
'format_id': '%sp' % (src.get('height') or format_id),
|
||||
'format_id': '%sp' % (src.get('height') or f_id),
|
||||
'width': int_or_none(src.get('width')),
|
||||
'height': int_or_none(src.get('height')),
|
||||
'vbr': int_or_none(src.get('video_bitrate_in_kbps')),
|
||||
@@ -237,30 +240,33 @@ class UdemyIE(InfoExtractor):
|
||||
def add_output_format_meta(f, key):
|
||||
output = outputs.get(key)
|
||||
if isinstance(output, dict):
|
||||
output_format = extract_output_format(output)
|
||||
output_format = extract_output_format(output, key)
|
||||
output_format.update(f)
|
||||
return output_format
|
||||
return f
|
||||
|
||||
def extract_formats(source_list):
|
||||
if not isinstance(source_list, list):
|
||||
return
|
||||
for source in source_list:
|
||||
video_url = source.get('file') or source.get('src')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
continue
|
||||
format_id = source.get('label')
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': '%sp' % format_id,
|
||||
'height': int_or_none(format_id),
|
||||
}
|
||||
if format_id:
|
||||
# Some videos contain additional metadata (e.g.
|
||||
# https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
|
||||
f = add_output_format_meta(f, format_id)
|
||||
formats.append(f)
|
||||
|
||||
download_urls = asset.get('download_urls')
|
||||
if isinstance(download_urls, dict):
|
||||
video = download_urls.get('Video')
|
||||
if isinstance(video, list):
|
||||
for format_ in video:
|
||||
video_url = format_.get('file')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = format_.get('label')
|
||||
f = {
|
||||
'url': format_['file'],
|
||||
'format_id': '%sp' % format_id,
|
||||
'height': int_or_none(format_id),
|
||||
}
|
||||
if format_id:
|
||||
# Some videos contain additional metadata (e.g.
|
||||
# https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
|
||||
f = add_output_format_meta(f, format_id)
|
||||
formats.append(f)
|
||||
extract_formats(download_urls.get('Video'))
|
||||
|
||||
view_html = lecture.get('view_html')
|
||||
if view_html:
|
||||
@@ -294,6 +300,35 @@ class UdemyIE(InfoExtractor):
|
||||
'height': height,
|
||||
}, res))
|
||||
|
||||
# react rendition since 2017.04.15 (see
|
||||
# https://github.com/rg3/youtube-dl/issues/12744)
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html,
|
||||
'setup data', default='{}', group='data'), video_id,
|
||||
transform_source=unescapeHTML, fatal=False)
|
||||
if data and isinstance(data, dict):
|
||||
extract_formats(data.get('sources'))
|
||||
if not duration:
|
||||
duration = int_or_none(data.get('duration'))
|
||||
tracks = data.get('tracks')
|
||||
if isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
src = track.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
continue
|
||||
lang = track.get('language') or track.get(
|
||||
'srclang') or track.get('label')
|
||||
sub_dict = automatic_captions if track.get(
|
||||
'autogenerated') is True else subtitles
|
||||
sub_dict.setdefault(lang, []).append({
|
||||
'url': src,
|
||||
})
|
||||
|
||||
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
@@ -302,7 +337,9 @@ class UdemyIE(InfoExtractor):
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
}
|
||||
|
||||
|
||||
|
||||
+40
-12
@@ -10,12 +10,14 @@ from ..utils import (
|
||||
|
||||
|
||||
class WSJIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
|
||||
(?:www\.)?wsj\.com/video/[^/]+/
|
||||
)
|
||||
(?P<id>[a-zA-Z0-9-]+)'''
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
|
||||
https?://(?:www\.)?wsj\.com/video/[^/]+/|
|
||||
wsj:
|
||||
)
|
||||
(?P<id>[a-fA-F0-9-]{36})
|
||||
'''
|
||||
IE_DESC = 'Wall Street Journal'
|
||||
_TESTS = [{
|
||||
'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
||||
@@ -38,12 +40,17 @@ class WSJIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
api_url = (
|
||||
'http://video-api.wsj.com/api-video/find_all_videos.asp?'
|
||||
'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,'
|
||||
'thumbnailList,author,description,name,duration,videoURL,'
|
||||
'titletag,formattedCreationDate,keywords,editor' % video_id)
|
||||
info = self._download_json(api_url, video_id)['items'][0]
|
||||
info = self._download_json(
|
||||
'http://video-api.wsj.com/api-video/find_all_videos.asp', video_id,
|
||||
query={
|
||||
'type': 'guid',
|
||||
'count': 1,
|
||||
'query': video_id,
|
||||
'fields': ','.join((
|
||||
'type', 'hls', 'videoMP4List', 'thumbnailList', 'author',
|
||||
'description', 'name', 'duration', 'videoURL', 'titletag',
|
||||
'formattedCreationDate', 'keywords', 'editor')),
|
||||
})['items'][0]
|
||||
title = info.get('name', info.get('titletag'))
|
||||
|
||||
formats = []
|
||||
@@ -87,3 +94,24 @@ class WSJIE(InfoExtractor):
|
||||
'title': title,
|
||||
'categories': info.get('keywords'),
|
||||
}
|
||||
|
||||
|
||||
class WSJArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?',
|
||||
'info_dict': {
|
||||
'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20170221',
|
||||
'uploader_id': 'ralcaraz',
|
||||
'title': 'Bao Bao the Panda Leaves for China',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
video_id = self._search_regex(
|
||||
r'data-src=["\']([a-fA-F0-9-]{36})', webpage, 'video id')
|
||||
return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)
|
||||
|
||||
@@ -10,12 +10,14 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
get_element_by_attribute,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -105,7 +107,9 @@ class YoukuIE(InfoExtractor):
|
||||
if stream.get('channel_type') == 'tail':
|
||||
continue
|
||||
format = stream.get('stream_type')
|
||||
fileid = stream['stream_fileid']
|
||||
fileid = try_get(
|
||||
stream, lambda x: x['segs'][0]['fileid'],
|
||||
compat_str) or stream['stream_fileid']
|
||||
fileid_dict[format] = fileid
|
||||
|
||||
def get_fileid(format, n):
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2017.04.14'
|
||||
__version__ = '2017.04.17'
|
||||
|
||||
Reference in New Issue
Block a user