From 6cbb20bb090845898fcc368beed45708f05bf908 Mon Sep 17 00:00:00 2001 From: DarkstaIkers Date: Tue, 29 Mar 2016 14:26:24 -0300 Subject: [PATCH 001/862] Update crunchyroll.py --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 8ae3f2890..44c720aaa 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -184,7 +184,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style'] output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x'] output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y'] - output += """ScaledBorderAndShadow: yes + output += """ScaledBorderAndShadow: no [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding From 33898fb19c1af161c503ebce8f9a4774fecee45e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 6 Oct 2016 10:45:57 +0100 Subject: [PATCH 002/862] [nzz] Add new extractor(#4407) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nzz.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/nzz.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index feee06004..72bc4f57c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -638,6 +638,7 @@ from .nytimes import ( NYTimesArticleIE, ) from .nuvid import NuvidIE +from .nzz import NZZIE from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE diff --git a/youtube_dl/extractor/nzz.py b/youtube_dl/extractor/nzz.py new file mode 100644 index 000000000..2d352f53f --- /dev/null +++ b/youtube_dl/extractor/nzz.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, +) + + +class NZZIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P\d+)' + _TEST = { + 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153', + 'info_dict': { + 'id': '9153', + }, + 'playlist_mincount': 6, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + entries = [] + for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage): + player_params = extract_attributes(player_element) + if player_params.get('data-type') not in ('kaltura_singleArticle',): + self.report_warning('Unsupported player type') + continue + entry_id = player_params['data-id'] + entries.append(self.url_result( + 'kaltura:1750922:' + entry_id, 'Kaltura', entry_id)) + + return self.playlist_result(entries, page_id) From 09b9c45e242cb9e85beaa98b4783ec02065f1ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Oct 2016 23:22:52 +0700 Subject: [PATCH 003/862] [generic] Add support for multiple vimeo embeds (Closes #10862) --- youtube_dl/extractor/generic.py | 6 +++--- youtube_dl/extractor/vimeo.py | 36 ++++++++++++++++++--------------- youtube_dl/extractor/vk.py | 2 +- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9ea306e3a..8ef8fb5f4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1754,9 +1754,9 @@ class GenericIE(InfoExtractor): if matches: return _playlist_from_matches(matches, ie='RtlNl') - vimeo_url = VimeoIE._extract_vimeo_url(url, webpage) - if vimeo_url is not None: - return self.url_result(vimeo_url) + vimeo_urls = VimeoIE._extract_urls(url, webpage) + if vimeo_urls: + return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key()) vid_me_embed_url = self._search_regex( r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 309a47bf0..ea8fc5908 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -355,23 +355,27 @@ class VimeoIE(VimeoBaseInfoExtractor): return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) @staticmethod - def _extract_vimeo_url(url, webpage): + def _extract_urls(url, webpage): + urls = [] # Look for embedded (iframe) Vimeo player - mobj = re.search( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) - if mobj: - player_url = unescapeHTML(mobj.group('url')) - return VimeoIE._smuggle_referrer(player_url, url) - # Look for embedded (swf embed) Vimeo player - mobj = re.search( - r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) - if mobj: - return mobj.group(1) - # Look more for non-standard embedded Vimeo player - mobj = re.search( - r']+src=(?P[\'"])(?P(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage) - if mobj: - return mobj.group('url') + for mobj in re.finditer( + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage): + urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) + PLAIN_EMBED_RE = ( + # Look for embedded (swf embed) Vimeo player + r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1', + # Look more for non-standard embedded Vimeo player + r']+src=(["\'])(?P(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1', + ) + for embed_re in PLAIN_EMBED_RE: + for mobj in re.finditer(embed_re, webpage): + urls.append(mobj.group('url')) + return urls + + @staticmethod + def _extract_url(url, webpage): + urls = VimeoIE._extract_urls(url, webpage) + return urls[0] if urls else None def _verify_player_video_password(self, url, video_id): password = self._downloader.params.get('videopassword') diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index ac77bc623..df43ba867 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -341,7 +341,7 @@ class VKIE(VKBaseIE): if youtube_url: return self.url_result(youtube_url, 'Youtube') - vimeo_url = VimeoIE._extract_vimeo_url(url, info_page) + vimeo_url = VimeoIE._extract_url(url, info_page) if vimeo_url is not None: return self.url_result(vimeo_url) From 831a34caa2112a9b2d867e05f8a4debf965e8389 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 03:28:41 +0800 Subject: [PATCH 004/862] [Makefilea] Fix for GNU make < 4 Closes #9387 The shell assignment operator != was introduced in GNU make 4.0, or specifically the commit in [1]. This fix removes such usages and fallback to a more portable syntax. Tested with: * GNU make 3.82 on CentOS 7.2 * bmake 20150910 on CentOS 7.2, source RPM from Fedora 24 [2] * GNU make 4.2.1 on Arch Linux (Arch official package) * bmake 20160926 on Arch Linux (Arch official package) * GNU make 3.82 on Arch Linux (Compiled from source) * Apple bsdmake-24 on macOS Sierra, binary package from Homebrew Thanks @bdeyal for the feedback of the first tests [1] http://git.savannah.gnu.org/cgit/make.git/commit/?id=b34438bee83ee906a23b881f257e684a0993b9b1 [2] http://koji.fedoraproject.org/koji/buildinfo?buildID=716769 --- ChangeLog | 6 ++++++ Makefile | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4f64edabb..be1cf90fb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* Support for GNU make < 4 is fixed (#9387) + + version 2016.10.02 Core diff --git a/Makefile b/Makefile index a2763a664..7393e3e1e 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi +SYSCONFDIR = $$(if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) @@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish lazy-extractors: youtube_dl/extractor/lazy_extractors.py -_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' +_EXTRACTOR_FILES = $$(find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ From c0a7b9b348bb580d32fc94ee90c1b3b02b668a9e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 16:02:53 +0800 Subject: [PATCH 005/862] Revert "[Makefilea] Fix for GNU make < 4" This reverts commit 831a34caa2112a9b2d867e05f8a4debf965e8389. The reverted commit breaks lazy extractors. --- ChangeLog | 6 ------ Makefile | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index be1cf90fb..4f64edabb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,3 @@ -version - -Core -* Support for GNU make < 4 is fixed (#9387) - - version 2016.10.02 Core diff --git a/Makefile b/Makefile index 7393e3e1e..a2763a664 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR = $$(if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) +SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) @@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish lazy-extractors: youtube_dl/extractor/lazy_extractors.py -_EXTRACTOR_FILES = $$(find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') +_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ From 3d83a1ae924902a0421bea8e2e6cd57bb34ee299 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 17:50:45 +0800 Subject: [PATCH 006/862] [generic] Support direct MMS links (closes #10838) --- ChangeLog | 6 ++++++ youtube_dl/extractor/generic.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4f64edabb..55e60758d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [generic] Support direct MMS links (#10838) + + version 2016.10.02 Core diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8ef8fb5f4..1f18cbfe9 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1412,6 +1412,18 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 3, }, + { + # Direct MMS link + 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', + 'info_dict': { + 'id': 'MilesReid(0709)', + 'ext': 'wmv', + 'title': 'MilesReid(0709)', + }, + 'params': { + 'skip_download': True, # rtsp downloads, requiring mplayer or mpv + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -1551,6 +1563,13 @@ class GenericIE(InfoExtractor): else: video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) + if parsed_url.scheme == 'mms': + return { + 'id': video_id, + 'title': video_id, + 'url': url, + } + self.to_screen('%s: Requesting header' % video_id) head_req = HEADRequest(url) From 98763ee354ffc13a57f28dbd006729affacb6d30 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 19:20:53 +0800 Subject: [PATCH 007/862] [extractor/common] Add id and title helpers for generic IEs --- youtube_dl/extractor/common.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1076b46da..da192728f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -21,6 +21,7 @@ from ..compat import ( compat_os_name, compat_str, compat_urllib_error, + compat_urllib_parse_unquote, compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, @@ -2020,6 +2021,12 @@ class InfoExtractor(object): headers['Ytdl-request-proxy'] = geo_verification_proxy return headers + def _generic_id(self, url): + return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) + + def _generic_title(self, url): + return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) + class SearchInfoExtractor(InfoExtractor): """ From 9dcd6fd3aae77571116ee8b823b6b9224d0ef2ad Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 19:22:30 +0800 Subject: [PATCH 008/862] [generic,commonprotocols] Move mms suuport from GenericIE And use _generic_* helpers in those extractors --- ChangeLog | 2 +- youtube_dl/extractor/commonprotocols.py | 36 ++++++++++++++++++++----- youtube_dl/extractor/generic.py | 24 ++--------------- 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/ChangeLog b/ChangeLog index 55e60758d..3aa4d67f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,7 @@ version Extractors -+ [generic] Support direct MMS links (#10838) ++ [commonprotocols] Support direct MMS links (#10838) version 2016.10.02 diff --git a/youtube_dl/extractor/commonprotocols.py b/youtube_dl/extractor/commonprotocols.py index 5d130a170..d98331a4e 100644 --- a/youtube_dl/extractor/commonprotocols.py +++ b/youtube_dl/extractor/commonprotocols.py @@ -1,13 +1,9 @@ from __future__ import unicode_literals -import os - from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_unquote, compat_urlparse, ) -from ..utils import url_basename class RtmpIE(InfoExtractor): @@ -23,8 +19,8 @@ class RtmpIE(InfoExtractor): }] def _real_extract(self, url): - video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) - title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) + video_id = self._generic_id(url) + title = self._generic_title(url) return { 'id': video_id, 'title': title, @@ -34,3 +30,31 @@ class RtmpIE(InfoExtractor): 'format_id': compat_urlparse.urlparse(url).scheme, }], } + + +class MmsIE(InfoExtractor): + IE_DESC = False # Do not list + _VALID_URL = r'(?i)mms://.+' + + _TEST = { + # Direct MMS link + 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', + 'info_dict': { + 'id': 'MilesReid(0709)', + 'ext': 'wmv', + 'title': 'MilesReid(0709)', + }, + 'params': { + 'skip_download': True, # rtsp downloads, requiring mplayer or mpv + }, + } + + def _real_extract(self, url): + video_id = self._generic_id(url) + title = self._generic_title(url) + + return { + 'id': video_id, + 'title': title, + 'url': url, + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1f18cbfe9..7b8a9cf9a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -27,7 +27,6 @@ from ..utils import ( unified_strdate, unsmuggle_url, UnsupportedError, - url_basename, xpath_text, ) from .brightcove import ( @@ -1412,18 +1411,6 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 3, }, - { - # Direct MMS link - 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', - 'info_dict': { - 'id': 'MilesReid(0709)', - 'ext': 'wmv', - 'title': 'MilesReid(0709)', - }, - 'params': { - 'skip_download': True, # rtsp downloads, requiring mplayer or mpv - }, - }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -1561,14 +1548,7 @@ class GenericIE(InfoExtractor): force_videoid = smuggled_data['force_videoid'] video_id = force_videoid else: - video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) - - if parsed_url.scheme == 'mms': - return { - 'id': video_id, - 'title': video_id, - 'url': url, - } + video_id = self._generic_id(url) self.to_screen('%s: Requesting header' % video_id) @@ -1597,7 +1577,7 @@ class GenericIE(InfoExtractor): info_dict = { 'id': video_id, - 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), + 'title': self._generic_title(url), 'upload_date': unified_strdate(head_response.headers.get('Last-Modified')) } From 85bcdd081ce0009bcb7135d8d68192d34969e168 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 19:31:26 +0800 Subject: [PATCH 009/862] [extractors] Add MmsIE --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 72bc4f57c..5c1d2abfb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -186,7 +186,10 @@ from .comedycentral import ( ) from .comcarcoff import ComCarCoffIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE -from .commonprotocols import RtmpIE +from .commonprotocols import ( + MmsIE, + RtmpIE, +) from .condenast import CondeNastIE from .cracked import CrackedIE from .crackle import CrackleIE From 38588ab9770813cb92013b870edc15def4f9ac1c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 20:04:49 +0800 Subject: [PATCH 010/862] [facebook] Fix for new handleServerJS syntax (closes #10846) According to the dump file in #10846, handleServerJS() now accepts an optional second argument. It's a string from available dump files. --- ChangeLog | 1 + youtube_dl/extractor/facebook.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3aa4d67f5..7aa0787ca 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [facebook] Fix video extraction (#10846) + [commonprotocols] Support direct MMS links (#10838) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 3a220e995..801573459 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -258,7 +258,7 @@ class FacebookIE(InfoExtractor): if not video_data: server_js_data = self._parse_json(self._search_regex( - r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id) + r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id) for item in server_js_data.get('instances', []): if item[1][0] == 'VideoConfig': video_data = video_data_list2dict(item[2][0]['videoData']) From 3c6b3bf2217e91c0d01ca65fa2b013ffa132fdbc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 7 Oct 2016 15:53:03 +0100 Subject: [PATCH 011/862] [iprima] detect geo restriction --- youtube_dl/extractor/iprima.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 788bbe0d5..da2cdc656 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -81,6 +81,9 @@ class IPrimaIE(InfoExtractor): for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage): extract_formats(src) + if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage: + self.raise_geo_restricted() + self._sort_formats(formats) return { From f475e8812197027ba7770a421e7fc7094ee8ae0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Oct 2016 22:15:26 +0700 Subject: [PATCH 012/862] [vimeo] PEP 8 [ci skip] --- youtube_dl/extractor/vimeo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index ea8fc5908..a46c5c282 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -359,7 +359,8 @@ class VimeoIE(VimeoBaseInfoExtractor): urls = [] # Look for embedded (iframe) Vimeo player for mobj in re.finditer( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage): + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', + webpage): urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) PLAIN_EMBED_RE = ( # Look for embedded (swf embed) Vimeo player From 888f8d6ba40f17d8f8a13ca6259e0312d9befc87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Oct 2016 22:23:16 +0700 Subject: [PATCH 013/862] [ChangeLog] Actualize --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index 7aa0787ca..fb248f9e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,21 @@ version Extractors ++ [iprima] Detect geo restriction * [facebook] Fix video extraction (#10846) + [commonprotocols] Support direct MMS links (#10838) ++ [generic] Add support for multiple vimeo embeds (#10862) ++ [nzz] Add support for nzz.ch (#4407) ++ [npo] Detect geo restriction ++ [npo] Add support for 2doc.nl (#10842) ++ [lego] Add support for lego.com (#10369) ++ [tonline] Add support for t-online.de (#10376) +* [techtalks] Relax URL regular expression (#10840) +* [youtube:live] Extend URL regular expression (#10839) ++ [theweatherchannel] Add support for weather.com (#7188) ++ [thisoldhouse] Add support for thisoldhouse.com (#10837) ++ [nhl] Add support for wch2016.com (#10833) +* [pornoxo] Use JWPlatform to improve metadata extraction version 2016.10.02 From dd4291f72984037286dfd1800fdc07204b0b621a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Oct 2016 22:25:30 +0700 Subject: [PATCH 014/862] release 2016.10.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 5 +++++ youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e813e4c59..15a93776b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.02 +[debug] youtube-dl version 2016.10.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index fb248f9e9..7e9b2b873 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.07 Extractors + [iprima] Detect geo restriction diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 828ed0ba9..5bbef0c41 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -364,6 +364,7 @@ - **Le**: 乐视网 - **Learnr** - **Lecture2Go** + - **LEGO** - **Lemonde** - **LePlaylist** - **LetvCloud**: 乐视云 @@ -507,6 +508,7 @@ - **Nuvid** - **NYTimes** - **NYTimesArticle** + - **NZZ** - **ocw.mit.edu** - **OdaTV** - **Odnoklassniki** @@ -692,6 +694,7 @@ - **SWRMediathek** - **Syfy** - **SztvHu** + - **t-online.de** - **Tagesschau** - **tagesschau:player** - **Tass** @@ -721,8 +724,10 @@ - **TheScene** - **TheSixtyOne** - **TheStar** + - **TheWeatherChannel** - **ThisAmericanLife** - **ThisAV** + - **ThisOldHouse** - **tinypic**: tinypic.com videos - **tlc.de** - **TMZ** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 161ba4391..ac0921b7a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.02' +__version__ = '2016.10.07' From 1dd58e14d846a64a3c014531b1dc7a377648c73b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 8 Oct 2016 08:33:02 +0100 Subject: [PATCH 015/862] [lego] improve info extraction and bypass geo restriction(closes #10872) --- youtube_dl/extractor/lego.py | 88 ++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py index 5be7d622c..d3bca6435 100644 --- a/youtube_dl/extractor/lego.py +++ b/youtube_dl/extractor/lego.py @@ -1,45 +1,86 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( unescapeHTML, - int_or_none, + parse_duration, + get_element_by_class, ) class LEGOIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lego\.com/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P[0-9a-f]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P[0-9a-f]+)' + _TESTS = [{ 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1', 'md5': 'f34468f176cfd76488767fc162c405fa', 'info_dict': { 'id': '55492d823b1b4d5e985787fa8c2973b1', 'ext': 'mp4', 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi', - } - } + 'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi', + }, + }, { + # geo-restricted but the contentUrl contain a valid url + 'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399', + 'md5': '4c3fec48a12e40c6e5995abc3d36cc2e', + 'info_dict': { + 'id': '13bdc2299ab24d9685701a915b3d71e7', + 'ext': 'mp4', + 'title': 'Aflevering 20 - Helden van het koninkrijk', + 'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941', + }, + }, { + # special characters in title + 'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d', + 'info_dict': { + 'id': '9685ee9d12e84ff38e84b4e3d0db533d', + 'ext': 'mp4', + 'title': 'Force Surprise – LEGO® Star Wars™ Microfighters', + 'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3', + }, + 'params': { + 'skip_download': True, + }, + }] _BITRATES = [256, 512, 1024, 1536, 2560] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://www.lego.com/en-US/mediaplayer/video/' + video_id, video_id) - title = self._search_regex(r'(.+?)', webpage, 'title') - video_data = self._parse_json(unescapeHTML(self._search_regex( - r"video='([^']+)'", webpage, 'video data')), video_id) - progressive_base = self._search_regex( - r'data-video-progressive-url="([^"]+)"', - webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/') - streaming_base = self._search_regex( - r'data-video-streaming-url="([^"]+)"', - webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/') - item_id = video_data['ItemId'] + locale, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id) + title = get_element_by_class('video-header', webpage).strip() + progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/' + streaming_base = 'http://legoprod-f.akamaihd.net/' + content_url = self._html_search_meta('contentUrl', webpage) + path = self._search_regex( + r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)', + content_url, 'video path', default=None) + if not path: + player_url = self._proto_relative_url(self._search_regex( + r']+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)', + webpage, 'player url', default=None)) + if not player_url: + base_url = self._proto_relative_url(self._search_regex( + r'data-baseurl="([^"]+)"', webpage, 'base url', + default='http://www.lego.com/%s/mediaplayer/video/' % locale)) + player_url = base_url + video_id + player_webpage = self._download_webpage(player_url, video_id) + video_data = self._parse_json(unescapeHTML(self._search_regex( + r"video='([^']+)'", player_webpage, 'video data')), video_id) + progressive_base = self._search_regex( + r'data-video-progressive-url="([^"]+)"', + player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/') + streaming_base = self._search_regex( + r'data-video-streaming-url="([^"]+)"', + player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/') + item_id = video_data['ItemId'] - net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) - base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) - path = '/'.join([net_storage_path, base_path]) + net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) + base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) + path = '/'.join([net_storage_path, base_path]) streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES)) formats = self._extract_akamai_formats( @@ -80,7 +121,8 @@ class LEGOIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'thumbnail': video_data.get('CoverImageUrl'), - 'duration': int_or_none(video_data.get('Length')), + 'description': self._html_search_meta('description', webpage), + 'thumbnail': self._html_search_meta('thumbnail', webpage), + 'duration': parse_duration(self._html_search_meta('duration', webpage)), 'formats': formats, } From 3adb9d119e049d2bbc92fe2b56f1a22f4a664892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 19:54:12 +0200 Subject: [PATCH 016/862] [reverbnation] Modernize --- youtube_dl/extractor/reverbnation.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index 3c6725aeb..52f18e231 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import str_or_none @@ -10,20 +8,19 @@ class ReverbNationIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', - 'md5': '3da12ebca28c67c111a7f8b262d3f7a7', + 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', 'info_dict': { 'id': '16965047', 'ext': 'mp3', 'title': 'MONA LISA', 'uploader': 'ALKILADOS', 'uploader_id': '216429', - 'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$' + 'thumbnail': 're:^https?://.*\.jpg', }, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - song_id = mobj.group('id') + song_id = self._match_id(url) api_res = self._download_json( 'https://api.reverbnation.com/song/%s' % song_id, @@ -31,14 +28,24 @@ class ReverbNationIE(InfoExtractor): note='Downloading information of song %s' % song_id ) + thumbnails = [] + if api_res.get('image'): + thumbnails.append({ + 'url': api_res.get('image'), + }) + if api_res.get('thumbnail'): + thumbnails.append({ + 'url': api_res.get('thumbnail'), + 'preference': -2, + }) + return { 'id': song_id, - 'title': api_res.get('name'), - 'url': api_res.get('url'), + 'title': api_res['name'], + 'url': api_res['url'], 'uploader': api_res.get('artist', {}).get('name'), 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), - 'thumbnail': self._proto_relative_url( - api_res.get('image', api_res.get('thumbnail'))), + 'thumbnails': thumbnails, 'ext': 'mp3', 'vcodec': 'none', } From f68901e50a9286aa4d82348cac0e85e26359c81c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Oct 2016 01:02:35 +0700 Subject: [PATCH 017/862] [reverbnation] Eliminate code duplication in thumbnails extraction --- youtube_dl/extractor/reverbnation.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index 52f18e231..4875009e5 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import str_or_none +from ..utils import ( + qualities, + str_or_none, +) class ReverbNationIE(InfoExtractor): @@ -28,16 +31,15 @@ class ReverbNationIE(InfoExtractor): note='Downloading information of song %s' % song_id ) + THUMBNAILS = ('thumbnail', 'image') + quality = qualities(THUMBNAILS) thumbnails = [] - if api_res.get('image'): - thumbnails.append({ - 'url': api_res.get('image'), - }) - if api_res.get('thumbnail'): - thumbnails.append({ - 'url': api_res.get('thumbnail'), - 'preference': -2, - }) + for thumb_key in THUMBNAILS: + if api_res.get(thumb_key): + thumbnails.append({ + 'url': api_res[thumb_key], + 'preference': quality(thumb_key) + }) return { 'id': song_id, From 2b51dac1f9750f6eb4988f3c23b0e8f618136b6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Sat, 1 Oct 2016 13:57:18 +0200 Subject: [PATCH 018/862] [slutload] Fix test and simplify --- youtube_dl/extractor/slutload.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py index 7efb29f65..18cc7721e 100644 --- a/youtube_dl/extractor/slutload.py +++ b/youtube_dl/extractor/slutload.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -9,7 +7,7 @@ class SlutloadIE(InfoExtractor): _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P[^/]+)/?$' _TEST = { 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', - 'md5': '0cf531ae8006b530bd9df947a6a0df77', + 'md5': '868309628ba00fd488cf516a113fd717', 'info_dict': { 'id': 'TD73btpBqSxc', 'ext': 'mp4', @@ -20,9 +18,7 @@ class SlutloadIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_title = self._html_search_regex(r'

([^<]+)', From 8204c733523675d505a8c726ec65b65e15485ce1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 18:22:55 +0800 Subject: [PATCH 019/862] [Makefile] Fix for GNU make < 4 (closes #9387) Shell assignment operator in BSD make != is ported to GNU make in version 4.0, so 3.x doesn't work. I choose to drop BSD make support as installing GNU make on *BSD systems is easier than installing newer GNU make. --- ChangeLog | 6 ++++++ Makefile | 4 ++-- README.md | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7e9b2b873..3d3473a4b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) + + version 2016.10.07 Extractors diff --git a/Makefile b/Makefile index a2763a664..8d66e48c9 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi +SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) @@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish lazy-extractors: youtube_dl/extractor/lazy_extractors.py -_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' +_EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ diff --git a/README.md b/README.md index 4debe15fe..1cb44b2cf 100644 --- a/README.md +++ b/README.md @@ -923,7 +923,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file If you want to create a build of youtube-dl yourself, you'll need * python -* make (both GNU make and BSD make are supported) +* make (only GNU make is supported) * pandoc * zip * nosetests From b0082629a9cf65796d503786c45c144d992010e7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 18:42:15 +0800 Subject: [PATCH 020/862] =?UTF-8?q?[nextmedia]=20Support=20action=20news?= =?UTF-8?q?=20(=E5=8B=95=E6=96=B0=E8=81=9E)=20on=20Apple=20Daily?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog | 3 +++ youtube_dl/extractor/nextmedia.py | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3d3473a4b..f74c6b5a4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ version Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) +Extractors ++ [nextmedia] Recognize action news on AppleDaily + version 2016.10.07 diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index a08e48c4b..dee9056d3 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', @@ -154,6 +154,9 @@ class AppleDailyIE(NextMediaIE): 'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748', 'upload_date': '20140417', }, + }, { + 'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/', + 'only_matching': True, }] _URL_PATTERN = r'\{url: \'(.+)\'\}' From 65f4c1de3d442a49367597a80687fddcf3d142a2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 18:58:15 +0800 Subject: [PATCH 021/862] [allocine] Fix extraction (closes #10860) I change the URL of the third test case, because now the original URL does not contain a video anymore, and there's no easy to get the real URL from the /film/ one. --- ChangeLog | 1 + youtube_dl/extractor/allocine.py | 57 ++++++++++++-------------------- 2 files changed, 22 insertions(+), 36 deletions(-) diff --git a/ChangeLog b/ChangeLog index f74c6b5a4..6c6053a2a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 7d280d871..b292ffdd9 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -1,29 +1,25 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import json - from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( qualities, - unescapeHTML, - xpath_element, + url_basename, ) class AllocineIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?Particle|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P[0-9]+)(?:\.html)?' + _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P[0-9]+)(?:\.html)?' _TESTS = [{ 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', 'md5': '0c9fcf59a841f65635fa300ac43d8269', 'info_dict': { 'id': '19546517', + 'display_id': '18635087', 'ext': 'mp4', 'title': 'Astérix - Le Domaine des Dieux Teaser VF', - 'description': 'md5:abcd09ce503c6560512c14ebfdb720d2', + 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -31,19 +27,21 @@ class AllocineIE(InfoExtractor): 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', 'info_dict': { 'id': '19540403', + 'display_id': '19540403', 'ext': 'mp4', 'title': 'Planes 2 Bande-annonce VF', 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', 'thumbnail': 're:http://.*\.jpg', }, }, { - 'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html', + 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', 'md5': '101250fb127ef9ca3d73186ff22a47ce', 'info_dict': { 'id': '19544709', + 'display_id': '19544709', 'ext': 'mp4', 'title': 'Dragons 2 - Bande annonce finale VF', - 'description': 'md5:601d15393ac40f249648ef000720e7e3', + 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -52,43 +50,30 @@ class AllocineIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - typ = mobj.group('typ') - display_id = mobj.group('id') + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - if typ == 'film': - video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') - else: - player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None) - if player: - player_data = json.loads(player) - video_id = compat_str(player_data['refMedia']) - else: - model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model') - model_data = self._parse_json(unescapeHTML(model), display_id) - video_id = compat_str(model_data['id']) + model = self._html_search_regex( + r'data-model="([^"]+)"', webpage, 'data model') + model_data = self._parse_json(model, display_id) - xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) - - video = xpath_element(xml, './/AcVisionVideo').attrib quality = qualities(['ld', 'md', 'hd']) formats = [] - for k, v in video.items(): - if re.match(r'.+_path', k): - format_id = k.split('_')[0] - formats.append({ - 'format_id': format_id, - 'quality': quality(format_id), - 'url': v, - }) + for video_url in model_data['sources'].values(): + video_id, format_id = url_basename(video_url).split('_')[:2] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': video_url, + }) self._sort_formats(formats) return { 'id': video_id, - 'title': video['videoTitle'], + 'display_id': display_id, + 'title': model_data['title'], 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'description': self._og_search_description(webpage), From 176006a1202cc6ef3d0a768ace41f97516c76c6d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 19:41:44 +0800 Subject: [PATCH 022/862] [allocine] Fix for /video/ videos (closes #10860) --- youtube_dl/extractor/allocine.py | 58 ++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index b292ffdd9..517b06def 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + remove_end, qualities, url_basename, ) @@ -46,7 +47,14 @@ class AllocineIE(InfoExtractor): }, }, { 'url': 'http://www.allocine.fr/video/video-19550147/', - 'only_matching': True, + 'md5': '3566c0668c0235e2d224fd8edb389f67', + 'info_dict': { + 'id': '19550147', + 'ext': 'mp4', + 'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger', + 'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354', + 'thumbnail': 're:http://.*\.jpg', + }, }] def _real_extract(self, url): @@ -54,26 +62,48 @@ class AllocineIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - model = self._html_search_regex( - r'data-model="([^"]+)"', webpage, 'data model') - model_data = self._parse_json(model, display_id) - + formats = [] quality = qualities(['ld', 'md', 'hd']) - formats = [] - for video_url in model_data['sources'].values(): - video_id, format_id = url_basename(video_url).split('_')[:2] - formats.append({ - 'format_id': format_id, - 'quality': quality(format_id), - 'url': video_url, - }) + model = self._html_search_regex( + r'data-model="([^"]+)"', webpage, 'data model', default=None) + if model: + model_data = self._parse_json(model, display_id) + + for video_url in model_data['sources'].values(): + video_id, format_id = url_basename(video_url).split('_')[:2] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': video_url, + }) + + title = model_data['title'] + else: + video_id = display_id + media_data = self._download_json( + 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) + for key, value in media_data['video'].items(): + if not key.endswith('Path'): + continue + + format_id = key[:-len('Path')] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': value, + }) + + title = remove_end(self._html_search_regex( + r'(?s)(.+?)', webpage, 'title' + ).strip(), ' - AlloCiné') + self._sort_formats(formats) return { 'id': video_id, 'display_id': display_id, - 'title': model_data['title'], + 'title': title, 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'description': self._og_search_description(webpage), From 71cdcb23316b55baf9330741ede4c77d08e4d77f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 12:30:35 +0800 Subject: [PATCH 023/862] [hbo] Support episode pages (closes #10892) --- ChangeLog | 1 + youtube_dl/extractor/hbo.py | 63 ++++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6c6053a2a..d49682c8b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors ++ [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index dad0f3994..3606d64fd 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -12,17 +12,7 @@ from ..utils import ( ) -class HBOIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P[0-9]+)' - _TEST = { - 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', - 'md5': '1c33253f0c7782142c993c0ba62a8753', - 'info_dict': { - 'id': '1437839', - 'ext': 'mp4', - 'title': 'Ep. 64 Clip: Encryption', - } - } +class HBOBaseIE(InfoExtractor): _FORMATS_INFO = { '1920': { 'width': 1280, @@ -50,8 +40,7 @@ class HBOIE(InfoExtractor): }, } - def _real_extract(self, url): - video_id = self._match_id(url) + def _extract_from_id(self, video_id): video_data = self._download_xml( 'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id) title = xpath_text(video_data, 'title', 'title', True) @@ -116,7 +105,53 @@ class HBOIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'duration': parse_duration(xpath_element(video_data, 'duration/tv14')), + 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')), 'formats': formats, 'thumbnails': thumbnails, } + + +class HBOIE(HBOBaseIE): + _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P[0-9]+)' + _TEST = { + 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', + 'md5': '1c33253f0c7782142c993c0ba62a8753', + 'info_dict': { + 'id': '1437839', + 'ext': 'mp4', + 'title': 'Ep. 64 Clip: Encryption', + 'thumbnail': 're:https?://.*\.jpg$', + 'duration': 1072, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + return self._extract_from_id(video_id) + + +class HBOEpisodeIE(HBOBaseIE): + _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P[0-9a-z-]+)\.html' + + _TESTS = [{ + 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true', + 'md5': '689132b253cc0ab7434237fc3a293210', + 'info_dict': { + 'id': '1439518', + 'ext': 'mp4', + 'title': 'Ep. 52: Inside the Episode', + 'thumbnail': 're:https?://.*\.jpg$', + 'duration': 240, + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'(?P[\'"])videoId(?P=q1)\s*:\s*(?P[\'"])(?P\d+)(?P=q2)', + webpage, 'video ID', group='video_id') + + return self._extract_from_id(video_id) From 27b8d2ee9535e19cdaed69de7a08ba0e026700e0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 12:41:30 +0800 Subject: [PATCH 024/862] [hbo] Add display_id and another test (#10892) --- youtube_dl/extractor/hbo.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index 3606d64fd..cbf774377 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -138,11 +138,15 @@ class HBOEpisodeIE(HBOBaseIE): 'md5': '689132b253cc0ab7434237fc3a293210', 'info_dict': { 'id': '1439518', + 'display_id': 'ep-52-inside-the-episode', 'ext': 'mp4', 'title': 'Ep. 52: Inside the Episode', 'thumbnail': 're:https?://.*\.jpg$', 'duration': 240, }, + }, { + 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true', + 'only_matching': True, }] def _real_extract(self, url): @@ -154,4 +158,7 @@ class HBOEpisodeIE(HBOBaseIE): r'(?P[\'"])videoId(?P=q1)\s*:\s*(?P[\'"])(?P\d+)(?P=q2)', webpage, 'video ID', group='video_id') - return self._extract_from_id(video_id) + info_dict = self._extract_from_id(video_id) + info_dict['display_id'] = display_id + + return info_dict From f165ca70eb4f7911949278e17751092a3cc8619f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 12:53:27 +0800 Subject: [PATCH 025/862] [abc.net.au:iview] Fix for non-series videos (closes #10895) --- ChangeLog | 1 + youtube_dl/extractor/abc.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index d49682c8b..26f01790a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [abc.net.au:iview] Fix for standalone (non series) videos (#10895) + [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 465249bbf..0247cabf9 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -102,16 +102,16 @@ class ABCIViewIE(InfoExtractor): # ABC iview programs are normally available for 14 days only. _TESTS = [{ - 'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00', - 'md5': '979d10b2939101f0d27a06b79edad536', + 'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00', + 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', 'info_dict': { - 'id': 'FA1505V024S00', + 'id': 'ZX9735A001S00', 'ext': 'mp4', - 'title': 'Series 27 Ep 24', - 'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d', - 'upload_date': '20160820', - 'uploader_id': 'abc1', - 'timestamp': 1471719600, + 'title': 'Diaries Of A Broken Mind', + 'description': 'md5:7de3903874b7a1be279fe6b68718fc9e', + 'upload_date': '20161010', + 'uploader_id': 'abc2', + 'timestamp': 1476064920, }, 'skip': 'Video gone', }] @@ -121,7 +121,7 @@ class ABCIViewIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_params = self._parse_json(self._search_regex( r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id) - title = video_params['title'] + title = video_params.get('title') or video_params['seriesTitle'] stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id) @@ -144,8 +144,8 @@ class ABCIViewIE(InfoExtractor): 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), 'series': video_params.get('seriesTitle'), 'series_id': video_params.get('seriesHouseNumber') or video_id[:7], - 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)), - 'episode': self._html_search_meta('episode_title', webpage), + 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)), + 'episode': self._html_search_meta('episode_title', webpage, default=None), 'uploader_id': video_params.get('channel'), 'formats': formats, 'subtitles': subtitles, From 555787d717985531b3beba566cb976fd3f849aaa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:44:35 +0800 Subject: [PATCH 026/862] [streamable] Add helper for extracting embedded videos --- youtube_dl/extractor/streamable.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py index 1c61437a4..56b926448 100644 --- a/youtube_dl/extractor/streamable.py +++ b/youtube_dl/extractor/streamable.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -48,6 +50,15 @@ class StreamableIE(InfoExtractor): } ] + @staticmethod + def _extract_url(webpage): + print(webpage) + mobj = re.search( + r']+src=(?P[\'"])(?P(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)', + webpage) + if mobj: + return mobj.group('src') + def _real_extract(self, url): video_id = self._match_id(url) From c452e69d3d3e6bbbec298a5d4b032cb502cef0ab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:46:13 +0800 Subject: [PATCH 027/862] [footyroom] Fix extraction and update _TESTS (closes #10810) --- ChangeLog | 1 + youtube_dl/extractor/footyroom.py | 32 ++++++++++++++++++------------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 26f01790a..76c446a6d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [footyroom] Fix extraction (#10810) * [abc.net.au:iview] Fix for standalone (non series) videos (#10895) + [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py index d2503ae2e..118325b6d 100644 --- a/youtube_dl/extractor/footyroom.py +++ b/youtube_dl/extractor/footyroom.py @@ -2,25 +2,27 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .streamable import StreamableIE class FootyRoomIE(InfoExtractor): - _VALID_URL = r'https?://footyroom\.com/(?P[^/]+)' + _VALID_URL = r'https?://footyroom\.com/matches/(?P\d+)' _TESTS = [{ - 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/', + 'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review', 'info_dict': { - 'id': 'schalke-04-0-2-real-madrid-2015-02', - 'title': 'Schalke 04 0 – 2 Real Madrid', + 'id': '79922154', + 'title': 'VIDEO Hull City 0 - 2 Chelsea', }, - 'playlist_count': 3, - 'skip': 'Video for this match is not available', + 'playlist_count': 2, + 'add_ie': [StreamableIE.ie_key()], }, { - 'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/', + 'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review', 'info_dict': { - 'id': 'georgia-0-2-germany-2015-03', - 'title': 'Georgia 0 – 2 Germany', + 'id': '75817984', + 'title': 'VIDEO Georgia 0 - 2 Germany', }, 'playlist_count': 1, + 'add_ie': ['Playwire'] }] def _real_extract(self, url): @@ -28,9 +30,8 @@ class FootyRoomIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - playlist = self._parse_json( - self._search_regex( - r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'), + playlist = self._parse_json(self._search_regex( + r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'), playlist_id) playlist_title = self._og_search_title(webpage) @@ -40,11 +41,16 @@ class FootyRoomIE(InfoExtractor): payload = video.get('payload') if not payload: continue - playwire_url = self._search_regex( + playwire_url = self._html_search_regex( r'data-config="([^"]+)"', payload, 'playwire url', default=None) if playwire_url: entries.append(self.url_result(self._proto_relative_url( playwire_url, 'http:'), 'Playwire')) + streamable_url = StreamableIE._extract_url(payload) + if streamable_url: + entries.append(self.url_result( + streamable_url, StreamableIE.ie_key())) + return self.playlist_result(entries, playlist_id, playlist_title) From 3d643f4cec5ded2be9958d5cd0e31176b2074e37 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:46:52 +0800 Subject: [PATCH 028/862] [hbo] Add HBOEpisodeIE (#10892) --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5c1d2abfb..08bed8b0c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -348,7 +348,10 @@ from .goshgay import GoshgayIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE from .hark import HarkIE -from .hbo import HBOIE +from .hbo import ( + HBOIE, + HBOEpisodeIE, +) from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE From 55642487f072565bea3b2826b836a1a3159a3807 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 20:50:52 +0800 Subject: [PATCH 029/862] [nhl] Skip invalid m3u8 formats (closes #10713) --- ChangeLog | 1 + youtube_dl/extractor/nhl.py | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 76c446a6d..9a7e7133b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [nhl] Correctly handle invalid formats (#10713) * [footyroom] Fix extraction (#10810) * [abc.net.au:iview] Fix for standalone (non series) videos (#10895) + [hbo] Add support for episode pages (#10892) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 26149c88f..62ce800c0 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -274,6 +274,18 @@ class NHLIE(InfoExtractor): 'upload_date': '20160204', 'timestamp': 1454544904, }, + }, { + # Some m3u8 URLs are invalid (https://github.com/rg3/youtube-dl/issues/10713) + 'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003', + 'md5': '50b2bb47f405121484dda3ccbea25459', + 'info_dict': { + 'id': '44315003', + 'ext': 'mp4', + 'title': 'Poile, Laviolette on Subban trade', + 'description': 'General manager David Poile and head coach Peter Laviolette share their thoughts on acquiring P.K. Subban from Montreal (06/29/16)', + 'timestamp': 1467242866, + 'upload_date': '20160629', + }, }, { 'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703', 'only_matching': True, @@ -301,9 +313,11 @@ class NHLIE(InfoExtractor): continue ext = determine_ext(playback_url) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( playback_url, video_id, 'mp4', 'm3u8_native', - m3u8_id=playback.get('name', 'hls'), fatal=False)) + m3u8_id=playback.get('name', 'hls'), fatal=False) + self._check_formats(m3u8_formats, video_id) + formats.extend(m3u8_formats) else: height = int_or_none(playback.get('height')) formats.append({ From cea364f70c97dad933fa38698f3c9df1bdb485cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:40:28 +0800 Subject: [PATCH 030/862] [extractor/common] Support HTML media elements without child nodes --- ChangeLog | 1 + youtube_dl/extractor/common.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 9a7e7133b..49488c888 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core ++ Support HTML media elements without child nodes * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da192728f..431cef831 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1802,7 +1802,11 @@ class InfoExtractor(object): return is_plain_url, formats entries = [] - for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage): + media_tags = [(media_tag, media_type, '') + for media_tag, media_type + in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)] + media_tags.extend(re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage)) + for media_tag, media_type, media_content in media_tags: media_info = { 'formats': [], 'subtitles': {}, From 6f20b65e728ee30d9b987a39932a3355501f7f67 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:41:41 +0800 Subject: [PATCH 031/862] [test/test_http] Update tests After switching to HTML5 extraction helpers in generic.py, the result info_dict is always a playlist. --- test/test_http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_http.py b/test/test_http.py index fdc68ccb4..bb0a098e4 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -87,7 +87,7 @@ class TestHTTP(unittest.TestCase): ydl = YoutubeDL({'logger': FakeLogger()}) r = ydl.extract_info('http://localhost:%d/302' % self.port) - self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port) + self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port) class TestHTTPS(unittest.TestCase): @@ -111,7 +111,7 @@ class TestHTTPS(unittest.TestCase): ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) r = ydl.extract_info('https://localhost:%d/video.html' % self.port) - self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port) + self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port) def _build_proxy_handler(name): From a093cfc78b584a6e5dbc4bbca525f9e40af9522d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:48:06 +0800 Subject: [PATCH 032/862] [vimeo:review] Fix extraction (#10900) Now Vimeo Review videos uses React. Thanks @davekaro for analyzing the problem! --- ChangeLog | 1 + youtube_dl/extractor/vimeo.py | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 49488c888..3e16a2cb3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [vimeo:review] Fix extraction (#10900) * [nhl] Correctly handle invalid formats (#10713) * [footyroom] Fix extraction (#10810) * [abc.net.au:iview] Fix for standalone (non series) videos (#10895) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a46c5c282..b566241cc 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -837,6 +837,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): 'params': { 'videopassword': 'holygrail', }, + 'skip': 'video gone', }] def _real_initialize(self): @@ -844,9 +845,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): def _get_config_url(self, webpage_url, video_id, video_password_verified=False): webpage = self._download_webpage(webpage_url, video_id) - config_url = self._html_search_regex( - r'data-config-url="([^"]+)"', webpage, 'config URL', - default=NO_DEFAULT if video_password_verified else None) + data = self._parse_json(self._search_regex( + r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', + default=NO_DEFAULT if video_password_verified else '{}'), video_id) + config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') if config_url is None: self._verify_video_password(webpage_url, video_id, webpage) config_url = self._get_config_url( From 9feb1c97318bbd575af6c2737dfe66412e1c0bb6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 21:45:49 +0800 Subject: [PATCH 033/862] [dailymotion] Fix extraction and update _TESTS Closes #10901 Seems all videos use player V5 syntax now --- ChangeLog | 1 + youtube_dl/extractor/dailymotion.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3e16a2cb3..fd3e8c2fa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [dailymotion] Fix extraction (#10901) * [vimeo:review] Fix extraction (#10900) * [nhl] Correctly handle invalid formats (#10713) * [footyroom] Fix extraction (#10810) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 62b0747a5..4a3314ea7 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -94,7 +94,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', 'uploader': 'HotWaves1012', 'age_limit': 18, - } + }, + 'skip': 'video gone', }, # geo-restricted, player v5 { @@ -144,7 +145,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): player_v5 = self._search_regex( [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826 r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', - r'buildPlayer\(({.+?})\);'], + r'buildPlayer\(({.+?})\);', + r'var\s+config\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: player = self._parse_json(player_v5, video_id) From 591e384552f44fe5d77015d17fa7f71efa66f778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:22:12 +0700 Subject: [PATCH 034/862] [streamable] Remove debug output --- youtube_dl/extractor/streamable.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py index 56b926448..2c26fa689 100644 --- a/youtube_dl/extractor/streamable.py +++ b/youtube_dl/extractor/streamable.py @@ -52,7 +52,6 @@ class StreamableIE(InfoExtractor): @staticmethod def _extract_url(webpage): - print(webpage) mobj = re.search( r']+src=(?P[\'"])(?P(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)', webpage) From bcd6276520e67f59b95dffdb280703328cab82de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:22:33 +0700 Subject: [PATCH 035/862] [downloader/common] Remove debug output --- youtube_dl/downloader/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 8482cbd84..3dc144b4e 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -346,7 +346,6 @@ class FileDownloader(object): min_sleep_interval = self.params.get('sleep_interval') if min_sleep_interval: max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - print(min_sleep_interval, max_sleep_interval) sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) time.sleep(sleep_interval) From 7104ae799c18e36070d91d570d48c55d651cd4b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:25:04 +0700 Subject: [PATCH 036/862] [ChangeLog] Actualize --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index fd3e8c2fa..cc526429a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -13,6 +13,7 @@ Extractors + [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily +* [lego] Improve info extraction and bypass geo restriction (#10872) version 2016.10.07 From 5c4bfd4da5d532bf8d5aaf1bb37396f7cfbc786b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:30:05 +0700 Subject: [PATCH 037/862] release 2016.10.12 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 2 +- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 15a93776b..865817681 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.07** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.12** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.07 +[debug] youtube-dl version 2016.10.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 95392030e..62acf9abd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -85,7 +85,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file If you want to create a build of youtube-dl yourself, you'll need * python -* make (both GNU make and BSD make are supported) +* make (only GNU make is supported) * pandoc * zip * nosetests diff --git a/ChangeLog b/ChangeLog index cc526429a..e3a733410 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.12 Core + Support HTML media elements without child nodes diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5bbef0c41..9b540b3df 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -289,6 +289,7 @@ - **Groupon** - **Hark** - **HBO** + - **HBOEpisode** - **HearThisAt** - **Heise** - **HellPorno** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ac0921b7a..44cc18828 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.07' +__version__ = '2016.10.12' From 580d41193169d004c94145ef03d5c53f06d5a57c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 00:44:28 +0800 Subject: [PATCH 038/862] [parliamentliveuk] Recognize lower case URLs Closes #10912 Seems parliamentliveuk matches URLs case-insentive. For example this URL also works: http://parliamentlive.tv/EvEnt/Index/3F24936f-130f-40bf-9a5d-b3d6479da6a4 --- ChangeLog | 6 ++++++ youtube_dl/extractor/parliamentliveuk.py | 9 ++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index e3a733410..d2b78a489 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [parliamentliveuk] Lower case URLs are now recognized (#10912) + + version 2016.10.12 Core diff --git a/youtube_dl/extractor/parliamentliveuk.py b/youtube_dl/extractor/parliamentliveuk.py index 874aacc55..ebdab8db9 100644 --- a/youtube_dl/extractor/parliamentliveuk.py +++ b/youtube_dl/extractor/parliamentliveuk.py @@ -6,9 +6,9 @@ from .common import InfoExtractor class ParliamentLiveUKIE(InfoExtractor): IE_NAME = 'parliamentlive.tv' IE_DESC = 'UK parliament videos' - _VALID_URL = r'https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' - _TEST = { + _TESTS = [{ 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'info_dict': { 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', @@ -18,7 +18,10 @@ class ParliamentLiveUKIE(InfoExtractor): 'timestamp': 1422696664, 'upload_date': '20150131', }, - } + }, { + 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From b7f59a3bf69b5c935be085551d30ce4d0b8a97d4 Mon Sep 17 00:00:00 2001 From: Philip Xu Date: Thu, 13 Oct 2016 21:51:26 -0400 Subject: [PATCH 039/862] [huajiao] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/huajiao.py | 50 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 youtube_dl/extractor/huajiao.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 08bed8b0c..75e16af4e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -372,6 +372,7 @@ from .hrti import ( HRTiIE, HRTiPlaylistIE, ) +from .huajiao import HuajiaoIE from .huffpost import HuffPostIE from .hypem import HypemIE from .iconosquare import IconosquareIE diff --git a/youtube_dl/extractor/huajiao.py b/youtube_dl/extractor/huajiao.py new file mode 100644 index 000000000..352b48120 --- /dev/null +++ b/youtube_dl/extractor/huajiao.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ..utils import parse_duration, parse_iso8601 +from .common import InfoExtractor + + +class HuajiaoIE(InfoExtractor): + IE_DESC = '花椒直播' + _VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.huajiao.com/l/38941232', + 'md5': 'd08bf9ac98787d24d1e4c0283f2d372d', + 'info_dict': { + 'id': '38941232', + 'ext': 'mp4', + 'title': '#新人求关注#', + 'description': 're:.*', + 'duration': 2424.0, + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1475866459, + 'upload_date': '20161007', + 'uploader': 'Penny_余姿昀', + 'uploader_id': '75206005', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + feed_json = self._search_regex( + r'var\s*feed\s*=\s*({.*})', webpage, 'feed json str') + feed = self._parse_json(feed_json, video_id) + + description = self._html_search_meta( + 'description', webpage, 'description', fatal=False) + + return { + 'id': video_id, + 'title': feed['feed']['formated_title'], + 'description': description, + 'duration': parse_duration(feed['feed']['duration']), + 'thumbnail': feed['feed']['image'], + 'timestamp': parse_iso8601(feed['creatime'], ' '), + 'uploader': feed['author']['nickname'], + 'uploader_id': feed['author']['uid'], + 'formats': self._extract_m3u8_formats( + feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'), + } From a5f847314582d4464e587120c6e696399ff121cb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 18:20:01 +0800 Subject: [PATCH 040/862] [cbsinteractive] Fix extraction for cnet.com --- ChangeLog | 1 + youtube_dl/extractor/cbsinteractive.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index d2b78a489..edd547811 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [cbsinteractive] Fix extraction for cnet.com * [parliamentliveuk] Lower case URLs are now recognized (#10912) diff --git a/youtube_dl/extractor/cbsinteractive.py b/youtube_dl/extractor/cbsinteractive.py index 821db20b2..57b18e81d 100644 --- a/youtube_dl/extractor/cbsinteractive.py +++ b/youtube_dl/extractor/cbsinteractive.py @@ -63,7 +63,7 @@ class CBSInteractiveIE(ThePlatformIE): webpage = self._download_webpage(url, display_id) data_json = self._html_search_regex( - r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'", + r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'", webpage, 'data json') data = self._parse_json(data_json, display_id) vdata = data.get('video') or data['videos'][0] From e2004ccaf711ff9aa9c0b647c3d6219093fb6c2a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 20:26:12 +0800 Subject: [PATCH 041/862] [canalplus] Fix video_id and update _TESTS Some tests are gone, and some redirect to different videos --- ChangeLog | 1 + youtube_dl/extractor/canalplus.py | 64 +++++++++++++++---------------- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index edd547811..32390f227 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [canalplus] Fix extraction for some videos * [cbsinteractive] Fix extraction for cnet.com * [parliamentliveuk] Lower case URLs are now recognized (#10912) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 6dab226af..1c3c41d26 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -6,11 +6,13 @@ import re from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( + dict_get, ExtractorError, HEADRequest, - unified_strdate, - qualities, int_or_none, + qualities, + remove_end, + unified_strdate, ) @@ -43,47 +45,46 @@ class CanalplusIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814', - 'md5': '41f438a4904f7664b91b4ed0dec969dc', 'info_dict': { - 'id': '1192814', + 'id': '1405510', + 'display_id': 'pid1830-c-zapping', 'ext': 'mp4', - 'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014", - 'description': "Toute l'année 2014 dans un Zapping exceptionnel !", - 'upload_date': '20150105', + 'title': 'Zapping - 02/07/2016', + 'description': 'Le meilleur de toutes les chaînes, tous les jours', + 'upload_date': '20160702', }, }, { 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', 'info_dict': { 'id': '1108190', - 'ext': 'flv', - 'title': 'Le labyrinthe - Boing super ranger', + 'display_id': 'pid1405-le-labyrinthe-boing-super-ranger', + 'ext': 'mp4', + 'title': 'BOING SUPER RANGER - Ep : Le labyrinthe', 'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff', 'upload_date': '20140724', }, 'skip': 'Only works from France', }, { - 'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231', + 'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html', + 'md5': '4b47b12b4ee43002626b97fad8fb1de5', 'info_dict': { - 'id': '1390231', + 'id': '1420213', + 'display_id': 'pid6318-videos-integrales', 'ext': 'mp4', - 'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité", - 'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6', - 'upload_date': '20160512', - }, - 'params': { - 'skip_download': True, + 'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016', + 'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799', + 'upload_date': '20161014', }, + 'skip': 'Only works from France', }, { - 'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224', + 'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510', 'info_dict': { - 'id': '1398334', + 'id': '1420176', + 'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510', 'ext': 'mp4', - 'title': "L'invité de Bruce Toussaint du 07/06/2016 - ", - 'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324', - 'upload_date': '20160607', - }, - 'params': { - 'skip_download': True, + 'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ', + 'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.', + 'upload_date': '20161014', }, }, { 'url': 'http://m.canalplus.fr/?vid=1398231', @@ -95,18 +96,17 @@ class CanalplusIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid') site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]] # Beware, some subclasses do not define an id group - display_id = mobj.group('display_id') or video_id + display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html') - if video_id is None: - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - [r']+?videoId=(["\'])(?P\d+)', r'id=["\']canal_video_player(?P\d+)'], - webpage, 'video id', group='id') + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + [r']+?videoId=(["\'])(?P\d+)', + r'id=["\']canal_video_player(?P\d+)'], + webpage, 'video id', group='id') info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) video_data = self._download_json(info_url, video_id, 'Downloading video JSON') From 146969e05bc2e2774aa96c62030cdb85ca5c7667 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 23:42:11 +0800 Subject: [PATCH 042/862] [videomore] Support