1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2025-01-10 03:28:49 +00:00

Merge remote-tracking branch 'upstream/master'

This commit is contained in:
vallovic 2021-02-20 20:40:33 +00:00
commit fd733f52f5
4 changed files with 158 additions and 30 deletions

View File

@ -1029,6 +1029,7 @@ from .safari import (
SafariApiIE, SafariApiIE,
SafariCourseIE, SafariCourseIE,
) )
from .samplefocus import SampleFocusIE
from .sapo import SapoIE from .sapo import SapoIE
from .savefrom import SaveFromIE from .savefrom import SaveFromIE
from .sbs import SBSIE from .sbs import SBSIE

View File

@ -0,0 +1,100 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
get_element_by_attribute,
int_or_none,
)
class SampleFocusIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar',
'md5': '48c8d62d60be467293912e0e619a5120',
'info_dict': {
'id': '40316',
'display_id': 'lil-peep-sad-emo-guitar',
'ext': 'mp3',
'title': 'Lil Peep Sad Emo Guitar',
'thumbnail': r're:^https?://.+\.png',
'license': 'Standard License',
'uploader': 'CapsCtrl',
'uploader_id': 'capsctrl',
'like_count': int,
'comment_count': int,
'categories': ['Samples', 'Guitar', 'Electric guitar'],
},
}, {
'url': 'https://samplefocus.com/samples/dababy-style-bass-808',
'only_matching': True
}, {
'url': 'https://samplefocus.com/samples/young-chop-kick',
'only_matching': True
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
sample_id = self._search_regex(
r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
webpage, 'sample id', group='id')
title = self._og_search_title(webpage, fatal=False) or self._html_search_regex(
r'<h1>(.+?)</h1>', webpage, 'title')
mp3_url = self._search_regex(
r'<input[^>]+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P<url>(?:(?!\2).)+)',
webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex(
r'<meta[^>]+itemprop=(["\'])contentUrl\1[^>]*>',
webpage, 'mp3 url', group=0))['content']
thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex(
r'<img[^>]+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P<url>(?:(?!\1).)+)',
webpage, 'mp3', fatal=False, group='url')
comments = []
for author_id, author, body in re.findall(r'(?s)<p[^>]+class="comment-author"><a[^>]+href="/users/([^"]+)">([^"]+)</a>.+?<p[^>]+class="comment-body">([^>]+)</p>', webpage):
comments.append({
'author': author,
'author_id': author_id,
'text': body,
})
uploader_id = uploader = None
mobj = re.search(r'>By <a[^>]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage)
if mobj:
uploader_id, uploader = mobj.groups()
breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage)
categories = []
if breadcrumb:
for _, name in re.findall(r'<span[^>]+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb):
categories.append(name)
def extract_count(klass):
return int_or_none(self._html_search_regex(
r'<span[^>]+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass,
webpage, klass, fatal=False))
return {
'id': sample_id,
'title': title,
'url': mp3_url,
'display_id': display_id,
'thumbnail': thumbnail,
'uploader': uploader,
'license': self._html_search_regex(
r'<a[^>]+href=(["\'])/license\1[^>]*>(?P<license>[^<]+)<',
webpage, 'license', fatal=False, group='license'),
'uploader_id': uploader_id,
'like_count': extract_count('sample-%s-favorites' % sample_id),
'comment_count': extract_count('comments'),
'comments': comments,
'categories': categories,
}

View File

@ -21,6 +21,7 @@ from ..utils import (
parse_iso8601, parse_iso8601,
sanitized_Request, sanitized_Request,
std_headers, std_headers,
try_get,
) )
@ -30,7 +31,7 @@ class VikiBaseIE(InfoExtractor):
_API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s' _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
_APP = '100005a' _APP = '100005a'
_APP_VERSION = '2.2.5.1428709186' _APP_VERSION = '6.0.0'
_APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad' _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
_GEO_BYPASS = False _GEO_BYPASS = False
@ -41,7 +42,7 @@ class VikiBaseIE(InfoExtractor):
_ERRORS = { _ERRORS = {
'geo': 'Sorry, this content is not available in your region.', 'geo': 'Sorry, this content is not available in your region.',
'upcoming': 'Sorry, this content is not yet available.', 'upcoming': 'Sorry, this content is not yet available.',
# 'paywall': 'paywall', 'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
} }
def _prepare_call(self, path, timestamp=None, post_data=None): def _prepare_call(self, path, timestamp=None, post_data=None):
@ -62,7 +63,8 @@ class VikiBaseIE(InfoExtractor):
def _call_api(self, path, video_id, note, timestamp=None, post_data=None): def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
resp = self._download_json( resp = self._download_json(
self._prepare_call(path, timestamp, post_data), video_id, note) self._prepare_call(path, timestamp, post_data), video_id, note,
headers={'x-viki-app-ver': self._APP_VERSION})
error = resp.get('error') error = resp.get('error')
if error: if error:
@ -82,11 +84,13 @@ class VikiBaseIE(InfoExtractor):
expected=True) expected=True)
def _check_errors(self, data): def _check_errors(self, data):
for reason, status in data.get('blocking', {}).items(): for reason, status in (data.get('blocking') or {}).items():
if status and reason in self._ERRORS: if status and reason in self._ERRORS:
message = self._ERRORS[reason] message = self._ERRORS[reason]
if reason == 'geo': if reason == 'geo':
self.raise_geo_restricted(msg=message) self.raise_geo_restricted(msg=message)
elif reason == 'paywall':
self.raise_login_required(message)
raise ExtractorError('%s said: %s' % ( raise ExtractorError('%s said: %s' % (
self.IE_NAME, message), expected=True) self.IE_NAME, message), expected=True)
@ -131,13 +135,19 @@ class VikiIE(VikiBaseIE):
'info_dict': { 'info_dict': {
'id': '1023585v', 'id': '1023585v',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Heirs Episode 14', 'title': 'Heirs - Episode 14',
'uploader': 'SBS', 'uploader': 'SBS Contents Hub',
'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e', 'timestamp': 1385047627,
'upload_date': '20131121', 'upload_date': '20131121',
'age_limit': 13, 'age_limit': 13,
'duration': 3570,
'episode_number': 14,
},
'params': {
'format': 'bestvideo',
}, },
'skip': 'Blocked in the US', 'skip': 'Blocked in the US',
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
}, { }, {
# clip # clip
'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference', 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
@ -153,7 +163,8 @@ class VikiIE(VikiBaseIE):
'uploader': 'Arirang TV', 'uploader': 'Arirang TV',
'like_count': int, 'like_count': int,
'age_limit': 0, 'age_limit': 0,
} },
'skip': 'Sorry. There was an error loading this video',
}, { }, {
'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi', 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
'info_dict': { 'info_dict': {
@ -171,7 +182,7 @@ class VikiIE(VikiBaseIE):
}, { }, {
# episode # episode
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
'md5': '94e0e34fd58f169f40c184f232356cfe', 'md5': '0a53dc252e6e690feccd756861495a8c',
'info_dict': { 'info_dict': {
'id': '44699v', 'id': '44699v',
'ext': 'mp4', 'ext': 'mp4',
@ -183,6 +194,10 @@ class VikiIE(VikiBaseIE):
'uploader': 'group8', 'uploader': 'group8',
'like_count': int, 'like_count': int,
'age_limit': 13, 'age_limit': 13,
'episode_number': 1,
},
'params': {
'format': 'bestvideo',
}, },
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'], 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
}, { }, {
@ -209,7 +224,7 @@ class VikiIE(VikiBaseIE):
}, { }, {
# non-English description # non-English description
'url': 'http://www.viki.com/videos/158036v-love-in-magic', 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
'md5': 'adf9e321a0ae5d0aace349efaaff7691', 'md5': '41faaba0de90483fb4848952af7c7d0d',
'info_dict': { 'info_dict': {
'id': '158036v', 'id': '158036v',
'ext': 'mp4', 'ext': 'mp4',
@ -220,6 +235,10 @@ class VikiIE(VikiBaseIE):
'title': 'Love In Magic', 'title': 'Love In Magic',
'age_limit': 13, 'age_limit': 13,
}, },
'params': {
'format': 'bestvideo',
},
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -229,36 +248,33 @@ class VikiIE(VikiBaseIE):
'https://www.viki.com/api/videos/' + video_id, 'https://www.viki.com/api/videos/' + video_id,
video_id, 'Downloading video JSON', headers={ video_id, 'Downloading video JSON', headers={
'x-client-user-agent': std_headers['User-Agent'], 'x-client-user-agent': std_headers['User-Agent'],
'x-viki-app-ver': '4.0.57', 'x-viki-app-ver': '3.0.0',
}) })
video = resp['video'] video = resp['video']
self._check_errors(video) self._check_errors(video)
title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False) title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
episode_number = int_or_none(video.get('number'))
if not title: if not title:
title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
container_titles = video.get('container', {}).get('titles', {}) container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
container_title = self.dict_selection(container_titles, 'en') container_title = self.dict_selection(container_titles, 'en')
title = '%s - %s' % (container_title, title) title = '%s - %s' % (container_title, title)
description = self.dict_selection(video.get('descriptions', {}), 'en') description = self.dict_selection(video.get('descriptions', {}), 'en')
duration = int_or_none(video.get('duration')) like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
timestamp = parse_iso8601(video.get('created_at'))
uploader = video.get('author')
like_count = int_or_none(video.get('likes', {}).get('count'))
age_limit = parse_age_limit(video.get('rating'))
thumbnails = [] thumbnails = []
for thumbnail_id, thumbnail in video.get('images', {}).items(): for thumbnail_id, thumbnail in (video.get('images') or {}).items():
thumbnails.append({ thumbnails.append({
'id': thumbnail_id, 'id': thumbnail_id,
'url': thumbnail.get('url'), 'url': thumbnail.get('url'),
}) })
subtitles = {} subtitles = {}
for subtitle_lang, _ in video.get('subtitle_completions', {}).items(): for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
subtitles[subtitle_lang] = [{ subtitles[subtitle_lang] = [{
'ext': subtitles_format, 'ext': subtitles_format,
'url': self._prepare_call( 'url': self._prepare_call(
@ -269,13 +285,15 @@ class VikiIE(VikiBaseIE):
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'duration': duration, 'duration': int_or_none(video.get('duration')),
'timestamp': timestamp, 'timestamp': parse_iso8601(video.get('created_at')),
'uploader': uploader, 'uploader': video.get('author'),
'uploader_url': video.get('author_url'),
'like_count': like_count, 'like_count': like_count,
'age_limit': age_limit, 'age_limit': parse_age_limit(video.get('rating')),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'subtitles': subtitles, 'subtitles': subtitles,
'episode_number': episode_number,
} }
formats = [] formats = []
@ -360,7 +378,7 @@ class VikiChannelIE(VikiBaseIE):
'info_dict': { 'info_dict': {
'id': '50c', 'id': '50c',
'title': 'Boys Over Flowers', 'title': 'Boys Over Flowers',
'description': 'md5:ecd3cff47967fe193cff37c0bec52790', 'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
}, },
'playlist_mincount': 71, 'playlist_mincount': 71,
}, { }, {
@ -371,6 +389,7 @@ class VikiChannelIE(VikiBaseIE):
'description': 'md5:05bf5471385aa8b21c18ad450e350525', 'description': 'md5:05bf5471385aa8b21c18ad450e350525',
}, },
'playlist_count': 127, 'playlist_count': 127,
'skip': 'Page not found',
}, { }, {
'url': 'http://www.viki.com/news/24569c-showbiz-korea', 'url': 'http://www.viki.com/news/24569c-showbiz-korea',
'only_matching': True, 'only_matching': True,

View File

@ -226,10 +226,12 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'is_live': is_live, 'is_live': is_live,
} }
def _extract_original_format(self, url, video_id): def _extract_original_format(self, url, video_id, unlisted_hash=None):
query = {'action': 'load_download_config'}
if unlisted_hash:
query['unlisted_hash'] = unlisted_hash
download_data = self._download_json( download_data = self._download_json(
url, video_id, fatal=False, url, video_id, fatal=False, query=query,
query={'action': 'load_download_config'},
headers={'X-Requested-With': 'XMLHttpRequest'}) headers={'X-Requested-With': 'XMLHttpRequest'})
if download_data: if download_data:
source_file = download_data.get('source_file') source_file = download_data.get('source_file')
@ -509,6 +511,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
{ {
'url': 'https://vimeo.com/160743502/abd0e13fb4', 'url': 'https://vimeo.com/160743502/abd0e13fb4',
'only_matching': True, 'only_matching': True,
},
{
# requires passing unlisted_hash(a52724358e) to load_download_config request
'url': 'https://vimeo.com/392479337/a52724358e',
'only_matching': True,
} }
# https://gettingthingsdone.com/workflowmap/ # https://gettingthingsdone.com/workflowmap/
# vimeo embed with check-password page protected by Referer header # vimeo embed with check-password page protected by Referer header
@ -673,7 +680,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
if config.get('view') == 4: if config.get('view') == 4:
config = self._verify_player_video_password(redirect_url, video_id, headers) config = self._verify_player_video_password(redirect_url, video_id, headers)
vod = config.get('video', {}).get('vod', {}) video = config.get('video') or {}
vod = video.get('vod') or {}
def is_rented(): def is_rented():
if '>You rented this title.<' in webpage: if '>You rented this title.<' in webpage:
@ -733,7 +741,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
formats = [] formats = []
source_format = self._extract_original_format( source_format = self._extract_original_format(
'https://vimeo.com/' + video_id, video_id) 'https://vimeo.com/' + video_id, video_id, video.get('unlisted_hash'))
if source_format: if source_format:
formats.append(source_format) formats.append(source_format)