From b92bb0e02a09930cad3c4f6a406eb503c941af61 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Fri, 19 Feb 2021 16:00:22 +0100
Subject: [PATCH 1/3] [viki] improve extraction(closes #26522)(closes #28203)

- extract uploader_url and episode_number
- report login required error
- extract 480p formats
- fix API v4 calls
---
 youtube_dl/extractor/viki.py | 69 +++++++++++++++++++++++-------------
 1 file changed, 44 insertions(+), 25 deletions(-)

diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index a311f21ef..2e9cbf148 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -21,6 +21,7 @@ from ..utils import (
     parse_iso8601,
     sanitized_Request,
     std_headers,
+    try_get,
 )
 
 
@@ -30,7 +31,7 @@ class VikiBaseIE(InfoExtractor):
     _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
 
     _APP = '100005a'
-    _APP_VERSION = '2.2.5.1428709186'
+    _APP_VERSION = '6.0.0'
     _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
 
     _GEO_BYPASS = False
@@ -41,7 +42,7 @@ class VikiBaseIE(InfoExtractor):
     _ERRORS = {
         'geo': 'Sorry, this content is not available in your region.',
         'upcoming': 'Sorry, this content is not yet available.',
-        # 'paywall': 'paywall',
+        'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
     }
 
     def _prepare_call(self, path, timestamp=None, post_data=None):
@@ -62,7 +63,8 @@ class VikiBaseIE(InfoExtractor):
 
     def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
         resp = self._download_json(
-            self._prepare_call(path, timestamp, post_data), video_id, note)
+            self._prepare_call(path, timestamp, post_data), video_id, note,
+            headers={'x-viki-app-ver': self._APP_VERSION})
 
         error = resp.get('error')
         if error:
@@ -82,11 +84,13 @@ class VikiBaseIE(InfoExtractor):
             expected=True)
 
     def _check_errors(self, data):
-        for reason, status in data.get('blocking', {}).items():
+        for reason, status in (data.get('blocking') or {}).items():
             if status and reason in self._ERRORS:
                 message = self._ERRORS[reason]
                 if reason == 'geo':
                     self.raise_geo_restricted(msg=message)
+                elif reason == 'paywall':
+                    self.raise_login_required(message)
                 raise ExtractorError('%s said: %s' % (
                     self.IE_NAME, message), expected=True)
 
@@ -131,13 +135,19 @@ class VikiIE(VikiBaseIE):
         'info_dict': {
             'id': '1023585v',
             'ext': 'mp4',
-            'title': 'Heirs Episode 14',
-            'uploader': 'SBS',
-            'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
+            'title': 'Heirs - Episode 14',
+            'uploader': 'SBS Contents Hub',
+            'timestamp': 1385047627,
             'upload_date': '20131121',
             'age_limit': 13,
+            'duration': 3570,
+            'episode_number': 14,
+        },
+        'params': {
+            'format': 'bestvideo',
         },
         'skip': 'Blocked in the US',
+        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
     }, {
         # clip
         'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
@@ -153,7 +163,8 @@ class VikiIE(VikiBaseIE):
             'uploader': 'Arirang TV',
             'like_count': int,
             'age_limit': 0,
-        }
+        },
+        'skip': 'Sorry. There was an error loading this video',
     }, {
         'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
         'info_dict': {
@@ -171,7 +182,7 @@ class VikiIE(VikiBaseIE):
     }, {
         # episode
         'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
-        'md5': '94e0e34fd58f169f40c184f232356cfe',
+        'md5': '0a53dc252e6e690feccd756861495a8c',
         'info_dict': {
             'id': '44699v',
             'ext': 'mp4',
@@ -183,6 +194,10 @@ class VikiIE(VikiBaseIE):
             'uploader': 'group8',
             'like_count': int,
             'age_limit': 13,
+            'episode_number': 1,
+        },
+        'params': {
+            'format': 'bestvideo',
         },
         'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
     }, {
@@ -209,7 +224,7 @@ class VikiIE(VikiBaseIE):
     }, {
         # non-English description
         'url': 'http://www.viki.com/videos/158036v-love-in-magic',
-        'md5': 'adf9e321a0ae5d0aace349efaaff7691',
+        'md5': '41faaba0de90483fb4848952af7c7d0d',
         'info_dict': {
             'id': '158036v',
             'ext': 'mp4',
@@ -220,6 +235,10 @@ class VikiIE(VikiBaseIE):
             'title': 'Love In Magic',
             'age_limit': 13,
         },
+        'params': {
+            'format': 'bestvideo',
+        },
+        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
     }]
 
     def _real_extract(self, url):
@@ -229,36 +248,33 @@ class VikiIE(VikiBaseIE):
             'https://www.viki.com/api/videos/' + video_id,
             video_id, 'Downloading video JSON', headers={
                 'x-client-user-agent': std_headers['User-Agent'],
-                'x-viki-app-ver': '4.0.57',
+                'x-viki-app-ver': '3.0.0',
             })
         video = resp['video']
 
         self._check_errors(video)
 
         title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
+        episode_number = int_or_none(video.get('number'))
         if not title:
-            title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
-            container_titles = video.get('container', {}).get('titles', {})
+            title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
+            container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
             container_title = self.dict_selection(container_titles, 'en')
             title = '%s - %s' % (container_title, title)
 
         description = self.dict_selection(video.get('descriptions', {}), 'en')
 
-        duration = int_or_none(video.get('duration'))
-        timestamp = parse_iso8601(video.get('created_at'))
-        uploader = video.get('author')
-        like_count = int_or_none(video.get('likes', {}).get('count'))
-        age_limit = parse_age_limit(video.get('rating'))
+        like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
 
         thumbnails = []
-        for thumbnail_id, thumbnail in video.get('images', {}).items():
+        for thumbnail_id, thumbnail in (video.get('images') or {}).items():
             thumbnails.append({
                 'id': thumbnail_id,
                 'url': thumbnail.get('url'),
             })
 
         subtitles = {}
-        for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
+        for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
             subtitles[subtitle_lang] = [{
                 'ext': subtitles_format,
                 'url': self._prepare_call(
@@ -269,13 +285,15 @@ class VikiIE(VikiBaseIE):
             'id': video_id,
             'title': title,
             'description': description,
-            'duration': duration,
-            'timestamp': timestamp,
-            'uploader': uploader,
+            'duration': int_or_none(video.get('duration')),
+            'timestamp': parse_iso8601(video.get('created_at')),
+            'uploader': video.get('author'),
+            'uploader_url': video.get('author_url'),
             'like_count': like_count,
-            'age_limit': age_limit,
+            'age_limit': parse_age_limit(video.get('rating')),
             'thumbnails': thumbnails,
             'subtitles': subtitles,
+            'episode_number': episode_number,
         }
 
         formats = []
@@ -360,7 +378,7 @@ class VikiChannelIE(VikiBaseIE):
         'info_dict': {
             'id': '50c',
             'title': 'Boys Over Flowers',
-            'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
+            'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
         },
         'playlist_mincount': 71,
     }, {
@@ -371,6 +389,7 @@ class VikiChannelIE(VikiBaseIE):
             'description': 'md5:05bf5471385aa8b21c18ad450e350525',
         },
         'playlist_count': 127,
+        'skip': 'Page not found',
     }, {
         'url': 'http://www.viki.com/news/24569c-showbiz-korea',
         'only_matching': True,

From cf2dbec6301177a1fddf72862de05fa912d9869d Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Fri, 19 Feb 2021 21:13:56 +0100
Subject: [PATCH 2/3] [vimeo] add support for unlisted video source format
 extraction

---
 youtube_dl/extractor/vimeo.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 15cd06268..bd2663fe0 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -226,10 +226,12 @@ class VimeoBaseInfoExtractor(InfoExtractor):
             'is_live': is_live,
         }
 
-    def _extract_original_format(self, url, video_id):
+    def _extract_original_format(self, url, video_id, unlisted_hash=None):
+        query = {'action': 'load_download_config'}
+        if unlisted_hash:
+            query['unlisted_hash'] = unlisted_hash
         download_data = self._download_json(
-            url, video_id, fatal=False,
-            query={'action': 'load_download_config'},
+            url, video_id, fatal=False, query=query,
             headers={'X-Requested-With': 'XMLHttpRequest'})
         if download_data:
             source_file = download_data.get('source_file')
@@ -509,6 +511,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
         {
             'url': 'https://vimeo.com/160743502/abd0e13fb4',
             'only_matching': True,
+        },
+        {
+            # requires passing unlisted_hash(a52724358e) to load_download_config request
+            'url': 'https://vimeo.com/392479337/a52724358e',
+            'only_matching': True,
         }
         # https://gettingthingsdone.com/workflowmap/
         # vimeo embed with check-password page protected by Referer header
@@ -673,7 +680,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
             if config.get('view') == 4:
                 config = self._verify_player_video_password(redirect_url, video_id, headers)
 
-        vod = config.get('video', {}).get('vod', {})
+        video = config.get('video') or {}
+        vod = video.get('vod') or {}
 
         def is_rented():
             if '>You rented this title.<' in webpage:
@@ -733,7 +741,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
         formats = []
 
         source_format = self._extract_original_format(
-            'https://vimeo.com/' + video_id, video_id)
+            'https://vimeo.com/' + video_id, video_id, video.get('unlisted_hash'))
         if source_format:
             formats.append(source_format)
 

From 21e872b19ada61337770160a124c4387d6c77e08 Mon Sep 17 00:00:00 2001
From: Isaac-the-Man <steven97102@gmail.com>
Date: Sun, 10 Jan 2021 10:37:54 -0500
Subject: [PATCH 3/3] [samplefocus] Add new extractor(closes #27763)

---
 youtube_dl/extractor/extractors.py  |   1 +
 youtube_dl/extractor/samplefocus.py | 100 ++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 youtube_dl/extractor/samplefocus.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 62819ddcf..1a39c25c5 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1029,6 +1029,7 @@ from .safari import (
     SafariApiIE,
     SafariCourseIE,
 )
+from .samplefocus import SampleFocusIE
 from .sapo import SapoIE
 from .savefrom import SaveFromIE
 from .sbs import SBSIE
diff --git a/youtube_dl/extractor/samplefocus.py b/youtube_dl/extractor/samplefocus.py
new file mode 100644
index 000000000..806c3c354
--- /dev/null
+++ b/youtube_dl/extractor/samplefocus.py
@@ -0,0 +1,100 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    extract_attributes,
+    get_element_by_attribute,
+    int_or_none,
+)
+
+
+class SampleFocusIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P<id>[^/?&#]+)'
+    _TESTS = [{
+        'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar',
+        'md5': '48c8d62d60be467293912e0e619a5120',
+        'info_dict': {
+            'id': '40316',
+            'display_id': 'lil-peep-sad-emo-guitar',
+            'ext': 'mp3',
+            'title': 'Lil Peep Sad Emo Guitar',
+            'thumbnail': r're:^https?://.+\.png',
+            'license': 'Standard License',
+            'uploader': 'CapsCtrl',
+            'uploader_id': 'capsctrl',
+            'like_count': int,
+            'comment_count': int,
+            'categories': ['Samples', 'Guitar', 'Electric guitar'],
+        },
+    }, {
+        'url': 'https://samplefocus.com/samples/dababy-style-bass-808',
+        'only_matching': True
+    }, {
+        'url': 'https://samplefocus.com/samples/young-chop-kick',
+        'only_matching': True
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        sample_id = self._search_regex(
+            r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
+            webpage, 'sample id', group='id')
+
+        title = self._og_search_title(webpage, fatal=False) or self._html_search_regex(
+            r'<h1>(.+?)</h1>', webpage, 'title')
+
+        mp3_url = self._search_regex(
+            r'<input[^>]+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P<url>(?:(?!\2).)+)',
+            webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex(
+                r'<meta[^>]+itemprop=(["\'])contentUrl\1[^>]*>',
+                webpage, 'mp3 url', group=0))['content']
+
+        thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex(
+            r'<img[^>]+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P<url>(?:(?!\1).)+)',
+            webpage, 'mp3', fatal=False, group='url')
+
+        comments = []
+        for author_id, author, body in re.findall(r'(?s)<p[^>]+class="comment-author"><a[^>]+href="/users/([^"]+)">([^"]+)</a>.+?<p[^>]+class="comment-body">([^>]+)</p>', webpage):
+            comments.append({
+                'author': author,
+                'author_id': author_id,
+                'text': body,
+            })
+
+        uploader_id = uploader = None
+        mobj = re.search(r'>By <a[^>]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage)
+        if mobj:
+            uploader_id, uploader = mobj.groups()
+
+        breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage)
+        categories = []
+        if breadcrumb:
+            for _, name in re.findall(r'<span[^>]+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb):
+                categories.append(name)
+
+        def extract_count(klass):
+            return int_or_none(self._html_search_regex(
+                r'<span[^>]+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass,
+                webpage, klass, fatal=False))
+
+        return {
+            'id': sample_id,
+            'title': title,
+            'url': mp3_url,
+            'display_id': display_id,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'license': self._html_search_regex(
+                r'<a[^>]+href=(["\'])/license\1[^>]*>(?P<license>[^<]+)<',
+                webpage, 'license', fatal=False, group='license'),
+            'uploader_id': uploader_id,
+            'like_count': extract_count('sample-%s-favorites' % sample_id),
+            'comment_count': extract_count('comments'),
+            'comments': comments,
+            'categories': categories,
+        }