From d1db6b99a8791ceae1d5397f20533ea7be158f66 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 21 May 2023 15:15:00 +0100 Subject: [PATCH 1/5] [utils] Update traverse_obj --- test/test_utils.py | 137 +++++++++++++++++++++++++------ youtube_dl/utils.py | 196 +++++++++++++++++++++++++++++--------------- 2 files changed, 240 insertions(+), 93 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index b85d397d0..2c5be6cc6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -81,6 +81,7 @@ from youtube_dl.utils import ( smuggle_url, str_to_int, strip_jsonp, + str_or_none, strip_or_none, subtitles_filename, timeconvert, @@ -1583,6 +1584,11 @@ Line 1 'dict': {}, } + # define a pukka Iterable + def iter_range(stop): + for from_ in range(stop): + yield from_ + # Test base functionality self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str', msg='allow tuple path') @@ -1598,15 +1604,17 @@ Line 1 # Test Ellipsis behavior self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis), - (item for item in _TEST_DATA.values() if item is not None), + (item for item in _TEST_DATA.values() if item not in (None, {})), msg='`...` should give all values except `None`') self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(), msg='`...` selection for dicts should select all values') self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')), ['https://www.example.com/0', 'https://www.example.com/1'], msg='nested `...` queries should work') - self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4), + self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4), msg='`...` query result should be flattened') + self.assertEqual(traverse_obj(iter_range(4), Ellipsis), list(iter_range(4)), + msg='`...` should accept iterables') # Test function as key self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)), @@ -1614,6 +1622,32 @@ Line 1 msg='function as query key should perform a filter based on (key, value)') self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), {'str'}, msg='exceptions in the query function should be caught') + self.assertEqual(traverse_obj(iter_range(4), lambda _, x: x % 2 == 0), [0, 2], + msg='function key should accept iterables') + + # Test set as key (transformation/type, like `expected_type`) + self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, set([compat_str.upper]), )), ['STR'], + msg='Function in set should be a transformation') + self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, set([compat_str]))), ['str'], + msg='Type in set should be a type filter') + self.assertEqual(traverse_obj(_TEST_DATA, {dict}), _TEST_DATA, + msg='A single set should be wrapped into a path') + self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, set([compat_str.upper]))), ['STR'], + msg='Transformation function should not raise') + self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, set([str_or_none]))), + [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None], + msg='Function in set should be a transformation') + + # Test `slice` as a key + _SLICE_DATA = [0, 1, 2, 3, 4] + self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None, + msg='slice on a dictionary should not throw') + self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1], + msg='slice key should apply slice to sequence') + self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2], + msg='slice key should apply slice to sequence') + self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2], + msg='slice key should apply slice to sequence') # Test alternative paths self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str', @@ -1659,15 +1693,23 @@ Line 1 {0: ['https://www.example.com/1', 'https://www.example.com/0']}, msg='triple nesting in dict path should be treated as branches') self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {}, - msg='remove `None` values when dict key') + msg='remove `None` values when top level dict key fails') self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis}, - msg='do not remove `None` values if `default`') - self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}}, - msg='do not remove empty values when dict key') - self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: {}}, - msg='do not remove empty values when dict key and a default') - self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {0: []}, - msg='if branch in dict key not successful, return `[]`') + msg='use `default` if key fails and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {}, + msg='remove empty values when dict key') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis}, + msg='use `default` when dict key and a default') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {}, + msg='remove empty values when nested dict key fails') + self.assertEqual(traverse_obj(None, {0: 'fail'}), {}, + msg='default to dict if pruned') + self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis}, + msg='default to dict if pruned and default is given') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}}, + msg='use nested `default` when nested dict key fails and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {}, + msg='remove key if branch in dict key not successful') # Testing default parameter behavior _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []} @@ -1691,20 +1733,56 @@ Line 1 msg='if branched but not successful return `[]`, not `default`') self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [], msg='if branched but object is empty return `[]`, not `default`') + self.assertEqual(traverse_obj(None, Ellipsis), [], + msg='if branched but object is `None` return `[]`, not `default`') + self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [], + msg='if branched but state is `None` return `[]`, not `default`') + + branching_paths = [ + ('fail', Ellipsis), + (Ellipsis, 'fail'), + 100 * ('fail',) + (Ellipsis,), + (Ellipsis,) + 100 * ('fail',), + ] + for branching_path in branching_paths: + self.assertEqual(traverse_obj({}, branching_path), [], + msg='if branched but state is `None`, return `[]` (not `default`)') + self.assertEqual(traverse_obj({}, 'fail', branching_path), [], + msg='if branching in last alternative and previous did not match, return `[]` (not `default`)') + self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x', + msg='if branching in last alternative and previous did match, return single value') + self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x', + msg='if branching in first alternative and non-branching path does match, return single value') + self.assertEqual(traverse_obj({}, branching_path, 'fail'), None, + msg='if branching in first alternative and non-branching path does not match, return `default`') # Testing expected_type behavior _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0} - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), 'str', - msg='accept matching `expected_type` type') - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None, - msg='reject non matching `expected_type` type') - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), '0', - msg='transform type using type function') - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', - expected_type=lambda _: 1 / 0), None, - msg='wrap expected_type function in try_call') - self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), ['str'], - msg='eliminate items that expected_type fails on') + + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), + 'str', msg='accept matching `expected_type` type') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), + None, msg='reject non-matching `expected_type` type') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), + '0', msg='transform type using type function') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0), + None, msg='wrap expected_type function in try_call') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), + ['str'], msg='eliminate items that expected_type fails on') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int), + {0: 100}, msg='type as expected_type should filter dict values') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none), + {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values') + self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int), + 1, msg='expected_type should not filter non-final dict values') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int), + {0: {0: 100}}, msg='expected_type should transform deep dict values') + self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)), + [{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values') + self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int), + [4], msg='expected_type regression for type matching in tuple branching') + self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int), + [], msg='expected_type regression for type matching in dict result') # Test get_all behavior _GET_ALL_DATA = {'key': [0, 1, 2]} @@ -1749,14 +1827,17 @@ Line 1 _traverse_string=True), '.', msg='traverse into converted data if `traverse_string`') self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis), - _traverse_string=True), list('str'), - msg='`...` branching into string should result in list') + _traverse_string=True), 'str', + msg='`...` should result in string (same value) if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)), + _traverse_string=True), 'sr', + msg='`slice` should result in string if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'), + _traverse_string=True), 'str', + msg='function should result in string if `traverse_string`') self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)), _traverse_string=True), ['s', 'r'], - msg='branching into string should result in list') - self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x), - _traverse_string=True), list('str'), - msg='function branching into string should result in list') + msg='branching should result in list if `traverse_string`') # Test is_user_input behavior _IS_USER_INPUT_DATA = {'range8': list(range(8))} @@ -1793,6 +1874,8 @@ Line 1 msg='failing str key on a `re.Match` should return `default`') self.assertEqual(traverse_obj(mobj, 8), None, msg='failing int key on a `re.Match` should return `default`') + self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'], + msg='function on a `re.Match` should give group name as well') def test_get_first(self): self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 584581b6a..7334472af 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4222,10 +4222,16 @@ def multipart_encode(data, boundary=None): return out, content_type -def variadic(x, allowed_types=(compat_str, bytes, dict)): - if not isinstance(allowed_types, tuple) and isinstance(allowed_types, compat_collections_abc.Iterable): +def is_iterable_like(x, allowed_types=compat_collections_abc.Iterable, blocked_types=NO_DEFAULT): + if blocked_types is NO_DEFAULT: + blocked_types = (compat_str, bytes, compat_collections_abc.Mapping) + return isinstance(x, allowed_types) and not isinstance(x, blocked_types) + + +def variadic(x, allowed_types=NO_DEFAULT): + if isinstance(allowed_types, compat_collections_abc.Iterable): allowed_types = tuple(allowed_types) - return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,) + return x if is_iterable_like(x, blocked_types=allowed_types) else (x,) def dict_get(d, key_or_keys, default=None, skip_false_values=True): @@ -5992,7 +5998,7 @@ def clean_podcast_url(url): def traverse_obj(obj, *paths, **kwargs): """ - Safely traverse nested `dict`s and `Sequence`s + Safely traverse nested `dict`s and `Iterable`s >>> obj = [{}, {"key": "value"}] >>> traverse_obj(obj, (1, "key")) @@ -6000,14 +6006,17 @@ def traverse_obj(obj, *paths, **kwargs): Each of the provided `paths` is tested and the first producing a valid result will be returned. The next path will also be tested if the path branched but no results could be found. - Supported values for traversal are `Mapping`, `Sequence` and `re.Match`. - A value of None is treated as the absence of a value. + Supported values for traversal are `Mapping`, `Iterable` and `re.Match`. + Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded. The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`. The keys in the path can be one of: - `None`: Return the current object. - - `str`/`int`: Return `obj[key]`. For `re.Match, return `obj.group(key)`. + - `set`: Requires the only item in the set to be a type or function, + like `{type}`/`{func}`. If a `type`, returns only values + of this type. If a function, returns `func(obj)`. + - `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`. - `slice`: Branch out and return all values in `obj[key]`. - `Ellipsis`: Branch out and return a list of all values. - `tuple`/`list`: Branch out and return a list of all matching values. @@ -6015,6 +6024,9 @@ def traverse_obj(obj, *paths, **kwargs): - `function`: Branch out and return values filtered by the function. Read as: `[value for key, value in obj if function(key, value)]`. For `Sequence`s, `key` is the index of the value. + For `Iterable`s, `key` is the enumeration count of the value. + For `re.Match`es, `key` is the group number (0 = full match) + as well as additionally any group names, if given. - `dict` Transform the current object and return a matching dict. Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`. @@ -6023,8 +6035,12 @@ def traverse_obj(obj, *paths, **kwargs): @params paths Paths which to traverse by. Keyword arguments: @param default Value to return if the paths do not match. + If the last key in the path is a `dict`, it will apply to each value inside + the dict instead, depth first. Try to avoid if using nested `dict` keys. @param expected_type If a `type`, only accept final values of this type. If any other callable, try to call the function on each result. + If the last key in the path is a `dict`, it will apply to each value inside + the dict instead, recursively. This does respect branching paths. @param get_all If `False`, return the first matching result, otherwise all matching ones. @param casesense If `False`, consider string dictionary keys as case insensitive. @@ -6035,12 +6051,15 @@ def traverse_obj(obj, *paths, **kwargs): @param _traverse_string Whether to traverse into objects as strings. If `True`, any non-compatible object will first be converted into a string and then traversed into. + The return value of that path will be a string instead, + not respecting any further branching. @returns The result of the object traversal. If successful, `get_all=True`, and the path branches at least once, then a list of results is returned instead. A list is always returned if the last path branches and no `default` is given. + If a path ends on a `dict` that result will always be a `dict`. """ # parameter defaults @@ -6054,7 +6073,6 @@ def traverse_obj(obj, *paths, **kwargs): # instant compat str = compat_str - is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes)) casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k if isinstance(expected_type, type): @@ -6068,116 +6086,162 @@ def traverse_obj(obj, *paths, **kwargs): for item in it: yield item - def apply_key(key, obj): - if obj is None: - return + def apply_key(key, obj, is_last): + branching = False + + if obj is None and _traverse_string: + if key is Ellipsis or callable(key) or isinstance(key, slice): + branching = True + result = () + else: + result = None elif key is None: - yield obj + result = obj + + elif isinstance(key, set): + assert len(key) == 1, 'Set should only be used to wrap a single item' + item = next(iter(key)) + if isinstance(item, type): + result = obj if isinstance(obj, item) else None + else: + result = try_call(item, args=(obj,)) elif isinstance(key, (list, tuple)): - for branch in key: - _, result = apply_path(obj, branch) - for item in result: - yield item + branching = True + result = from_iterable( + apply_path(obj, branch, is_last)[0] for branch in key) elif key is Ellipsis: - result = [] + branching = True if isinstance(obj, compat_collections_abc.Mapping): result = obj.values() - elif is_sequence(obj): + elif is_iterable_like(obj): result = obj elif isinstance(obj, compat_re_Match): result = obj.groups() elif _traverse_string: + branching = False result = str(obj) - for item in result: - yield item + else: + result = () elif callable(key): - if is_sequence(obj): - iter_obj = enumerate(obj) - elif isinstance(obj, compat_collections_abc.Mapping): + branching = True + if isinstance(obj, compat_collections_abc.Mapping): iter_obj = obj.items() + elif is_iterable_like(obj): + iter_obj = enumerate(obj) elif isinstance(obj, compat_re_Match): - iter_obj = enumerate(itertools.chain([obj.group()], obj.groups())) + iter_obj = itertools.chain( + enumerate(itertools.chain((obj.group(),), obj.groups())), + obj.groupdict().items()) elif _traverse_string: + branching = False iter_obj = enumerate(str(obj)) else: - return - for item in (v for k, v in iter_obj if try_call(key, args=(k, v))): - yield item + iter_obj = () + + result = (v for k, v in iter_obj if try_call(key, args=(k, v))) + if not branching: # string traversal + result = ''.join(result) elif isinstance(key, dict): - iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items()) - yield dict((k, v if v is not None else default) for k, v in iter_obj - if v is not None or default is not NO_DEFAULT) + iter_obj = ((k, _traverse_obj(obj, v, False, is_last)) for k, v in key.items()) + result = dict((k, v if v is not None else default) for k, v in iter_obj + if v is not None or default is not NO_DEFAULT) or None elif isinstance(obj, compat_collections_abc.Mapping): - yield (obj.get(key) if casesense or (key in obj) - else next((v for k, v in obj.items() if casefold(k) == key), None)) + result = (try_call(obj.get, args=(key,)) + if casesense or try_call(obj.__contains__, args=(key,)) + else next((v for k, v in obj.items() if casefold(k) == key), None)) elif isinstance(obj, compat_re_Match): + result = None if isinstance(key, int) or casesense: try: - yield obj.group(key) - return + result = obj.group(key) except IndexError: pass - if not isinstance(key, str): - return - yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None) + elif isinstance(key, str): + result = next((v for k, v in obj.groupdict().items() + if casefold(k) == key), None) else: - if _is_user_input: - key = (int_or_none(key) if ':' not in key - else slice(*map(int_or_none, key.split(':')))) + result = None + if isinstance(key, (int, slice)): + if is_iterable_like(obj, compat_collections_abc.Sequence): + branching = isinstance(key, slice) + try: + result = obj[key] + except IndexError: + pass + elif _traverse_string: + try: + result = str(obj)[key] + except IndexError: + pass - if not isinstance(key, (int, slice)): - return + return branching, result if branching else (result,) - if not is_sequence(obj): - if not _traverse_string: - return - obj = str(obj) + def lazy_last(iterable): + iterator = iter(iterable) + prev = next(iterator, NO_DEFAULT) + if prev is NO_DEFAULT: + return - try: - yield obj[key] - except IndexError: - pass + for item in iterator: + yield False, prev + prev = item - def apply_path(start_obj, path): + yield True, prev + + def apply_path(start_obj, path, test_type): objs = (start_obj,) has_branched = False - for key in variadic(path): - if _is_user_input and key == ':': - key = Ellipsis + key = None + for last, key in lazy_last(variadic(path, (str, bytes, dict, set))): + if _is_user_input and isinstance(key, str): + if key == ':': + key = Ellipsis + elif ':' in key: + key = slice(*map(int_or_none, key.split(':'))) + elif int_or_none(key) is not None: + key = int(key) if not casesense and isinstance(key, str): key = compat_casefold(key) - if key is Ellipsis or isinstance(key, (list, tuple)) or callable(key): - has_branched = True + new_objs = [] + for obj in objs: + branching, results = apply_key(key, obj, last) + has_branched |= branching + new_objs.append(results) - key_func = functools.partial(apply_key, key) - objs = from_iterable(map(key_func, objs)) + objs = from_iterable(new_objs) - return has_branched, objs + if test_type and not isinstance(key, (dict, list, tuple)): + objs = map(type_test, objs) - def _traverse_obj(obj, path, use_list=True): - has_branched, results = apply_path(obj, path) - results = LazyList(x for x in map(type_test, results) if x is not None) + return objs, has_branched, isinstance(key, dict) + + def _traverse_obj(obj, path, allow_empty, test_type): + results, has_branched, is_dict = apply_path(obj, path, test_type) + results = LazyList(x for x in results if x not in (None, {})) if get_all and has_branched: - return results.exhaust() if results or use_list else None + if results: + return results.exhaust() + if allow_empty: + return [] if default is NO_DEFAULT else default + return None - return results[0] if results else None + return results[0] if results else {} if allow_empty and is_dict else None for index, path in enumerate(paths, 1): - use_list = default is NO_DEFAULT and index == len(paths) - result = _traverse_obj(obj, path, use_list) + result = _traverse_obj(obj, path, index == len(paths), True) if result is not None: return result From 779ea03331653af8d9aee7738087d4584b1c57e2 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 21 May 2023 15:32:40 +0100 Subject: [PATCH 2/5] [DPlay] Merge from yt-dlp #1 * remove discoverynetworks.py --- youtube_dl/extractor/discoverynetworks.py | 43 --- youtube_dl/extractor/dplay.py | 363 ++++++++++++++-------- youtube_dl/extractor/extractors.py | 2 +- 3 files changed, 226 insertions(+), 182 deletions(-) delete mode 100644 youtube_dl/extractor/discoverynetworks.py diff --git a/youtube_dl/extractor/discoverynetworks.py b/youtube_dl/extractor/discoverynetworks.py deleted file mode 100644 index c512b95d0..000000000 --- a/youtube_dl/extractor/discoverynetworks.py +++ /dev/null @@ -1,43 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .dplay import DPlayIE - - -class DiscoveryNetworksDeIE(DPlayIE): - _VALID_URL = r'https?://(?:www\.)?(?P(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P[^/]+)/(?:video/)?(?P[^/]+)' - - _TESTS = [{ - 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', - 'info_dict': { - 'id': '78867', - 'ext': 'mp4', - 'title': 'Die Welt da draußen', - 'description': 'md5:61033c12b73286e409d99a41742ef608', - 'timestamp': 1554069600, - 'upload_date': '20190331', - }, - 'params': { - 'format': 'bestvideo', - 'skip_download': True, - }, - }, { - 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316', - 'only_matching': True, - }, { - 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', - 'only_matching': True, - }, { - 'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/', - 'only_matching': True, - }] - - def _real_extract(self, url): - domain, programme, alternate_id = re.match(self._VALID_URL, url).groups() - country = 'GB' if domain == 'dplay.co.uk' else 'DE' - realm = 'questuk' if country == 'GB' else domain.replace('.', '') - return self._get_disco_api_info( - url, '%s/%s' % (programme, alternate_id), - 'sonic-eu1-prod.disco-api.com', realm, country) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index bbb199094..1ead0c564 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import json import re +import uuid from .common import InfoExtractor from ..compat import compat_HTTPError @@ -11,13 +12,183 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, - strip_or_none, + join_nonempty, + remove_start, + traverse_obj, + txt_or_none, unified_timestamp, + url_or_none, ) -class DPlayIE(InfoExtractor): +# for the 2 (?) people running Py 2.6 that has no {member} literals +T = lambda x: set((x,)) + + +class DPlayBaseIE(InfoExtractor): _PATH_REGEX = r'/(?P[^/]+/[^/?#]+)' + _auth_token_cache = {} + + def _get_auth(self, disco_base, display_id, realm, needs_device_id=True): + key = (disco_base, realm) + st = self._get_cookies(disco_base).get('st') + token = (st and st.value) or self._auth_token_cache.get(key) + + if not token: + query = {'realm': realm} + if needs_device_id: + query['deviceId'] = uuid.uuid4().hex + token = self._download_json( + disco_base + 'token', display_id, 'Downloading token', + query=query)['data']['attributes']['token'] + + # Save cache only if cookies are not being set + if not self._get_cookies(disco_base).get('st'): + self._auth_token_cache[key] = token + + return 'Bearer {0}'.format(token) + + def _process_errors(self, e, geo_countries=None): + info = self._parse_json(e.cause.read().decode('utf-8'), None, fatal=False) + error = traverse_obj(info, ('errors', 0, T(dict))) or {} + error_code = error.get('code') + if geo_countries is not None and error_code == 'access.denied.geoblocked': + self.raise_geo_restricted(countries=geo_countries) + elif error_code in ('access.denied.missingpackage', 'invalid.token'): + raise ExtractorError( + 'This video is only available for registered users. You may want to use --cookies.', expected=True) + raise ExtractorError(join_nonempty( + 'code', 'detail', delim=' - ', from_dict=error) or 'Unknown error', + expected=True) + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers['Authorization'] = self._get_auth(disco_base, display_id, realm, False) + + def _download_video_playback_info(self, disco_base, video_id, headers): + streaming = self._download_json( + disco_base + 'playback/videoPlaybackInfo/' + video_id, + video_id, headers=headers)['data']['attributes']['streaming'] + return [fmt for fmt in traverse_obj(streaming, ( + T(lambda x: x.items()), Ellipsis, { + 'type': (0, ), + 'url': (1, 'url', T(url_or_none))}))] + + def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''): + geo_countries = [country.upper()] + self._initialize_geo_bypass({ + 'countries': geo_countries, + }) + disco_base = 'https://%s/' % disco_host + headers = { + 'Referer': url, + } + self._update_disco_api_headers(headers, disco_base, display_id, realm) + try: + video = self._download_json( + disco_base + 'content/videos/' + display_id, display_id, + headers=headers, query={ + 'fields[channel]': 'name', + 'fields[image]': 'height,src,width', + 'fields[show]': 'name', + 'fields[tag]': 'name', + 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration', + 'include': 'images,primaryChannel,show,tags' + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + if e.cause.code == 400: + self._process_errors(e, geo_countries) + elif e.cause.code == 404: + self._process_errors(e) + raise + video_id = video['data']['id'] + info = video['data']['attributes'] + title = info['name'].strip() + formats = [] + subtitles = {} + try: + streaming = self._download_video_playback_info( + disco_base, video_id, headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self._process_errors(e, geo_countries) + raise + for format_dict in traverse_obj(streaming, (Ellipsis, {'type': 'type', 'url': ('url', T(url_or_none))})): + format_url = format_dict['url'] + format_id = format_dict['type'] + ext = determine_ext(format_url) + if format_id == 'dash' or ext == 'mpd': + # dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles( + dash_fmts, dash_subs = self._extract_mpd_formats( + format_url, display_id, mpd_id='dash', fatal=False), {} + formats.extend(dash_fmts) + subtitles = self._merge_subtitles(subtitles, dash_subs) + elif format_id == 'hls' or ext == 'm3u8': + # m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( + m3u8_fmts, m3u8_subs = self._extract_m3u8_formats( + format_url, display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False), {} + formats.extend(m3u8_fmts) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + self._sort_formats(formats) + + creator = series = None + tags = [] + thumbnails = [] + for e in traverse_obj(video, ('included', lambda _, v: v['attributes'].keys())): + if True: + attributes = e['attributes'] + e_type = e.get('type') + if e_type == 'channel': + creator = attributes.get('name') + elif e_type == 'image': + src = attributes.get('src') + if src: + thumbnails.append({ + 'url': src, + 'width': int_or_none(attributes.get('width')), + 'height': int_or_none(attributes.get('height')), + }) + if e_type == 'show': + series = attributes.get('name') + elif e_type == 'tag': + name = attributes.get('name') + if name: + tags.append(name) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': txt_or_none(info.get('description')), + 'duration': float_or_none(info.get('videoDuration'), 1000), + 'timestamp': unified_timestamp(info.get('publishStart')), + 'series': series, + 'season_number': int_or_none(info.get('seasonNumber')), + 'episode_number': int_or_none(info.get('episodeNumber')), + 'creator': creator, + 'tags': tags, + 'thumbnails': thumbnails, + 'formats': formats, + 'subtitles': subtitles, + 'http_headers': { + 'referer': domain, + }, + } + + @classmethod + def _match_valid_url(cls, url): + return re.match(cls._VALID_URL, url) + + +class DPlayIE(DPlayBaseIE): + # Nordic/Mediterranean/Japanese Discovery: obsolete? _VALID_URL = r'''(?x)https?:// (?P (?:www\.)?(?Pd @@ -27,7 +198,7 @@ class DPlayIE(InfoExtractor): ) )| (?Pes|it)\.dplay\.com - )/[^/]+''' + _PATH_REGEX + )/[^/]+''' + DPlayBaseIE._PATH_REGEX _TESTS = [{ # non geo restricted, via secure api, unsigned download hls URL @@ -154,146 +325,62 @@ class DPlayIE(InfoExtractor): 'only_matching': True, }] - def _process_errors(self, e, geo_countries): - info = self._parse_json(e.cause.read().decode('utf-8'), None) - error = info['errors'][0] - error_code = error.get('code') - if error_code == 'access.denied.geoblocked': - self.raise_geo_restricted(countries=geo_countries) - elif error_code in ('access.denied.missingpackage', 'invalid.token'): - raise ExtractorError( - 'This video is only available for registered users. You may want to use --cookies.', expected=True) - raise ExtractorError(info['errors'][0]['detail'], expected=True) - - def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers['Authorization'] = 'Bearer ' + self._download_json( - disco_base + 'token', display_id, 'Downloading token', - query={ - 'realm': realm, - })['data']['attributes']['token'] - - def _download_video_playback_info(self, disco_base, video_id, headers): - streaming = self._download_json( - disco_base + 'playback/videoPlaybackInfo/' + video_id, - video_id, headers=headers)['data']['attributes']['streaming'] - streaming_list = [] - for format_id, format_dict in streaming.items(): - streaming_list.append({ - 'type': format_id, - 'url': format_dict.get('url'), - }) - return streaming_list - - def _get_disco_api_info(self, url, display_id, disco_host, realm, country): - geo_countries = [country.upper()] - self._initialize_geo_bypass({ - 'countries': geo_countries, - }) - disco_base = 'https://%s/' % disco_host - headers = { - 'Referer': url, - } - self._update_disco_api_headers(headers, disco_base, display_id, realm) - try: - video = self._download_json( - disco_base + 'content/videos/' + display_id, display_id, - headers=headers, query={ - 'fields[channel]': 'name', - 'fields[image]': 'height,src,width', - 'fields[show]': 'name', - 'fields[tag]': 'name', - 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration', - 'include': 'images,primaryChannel,show,tags' - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - self._process_errors(e, geo_countries) - raise - video_id = video['data']['id'] - info = video['data']['attributes'] - title = info['name'].strip() - formats = [] - try: - streaming = self._download_video_playback_info( - disco_base, video_id, headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - self._process_errors(e, geo_countries) - raise - for format_dict in streaming: - if not isinstance(format_dict, dict): - continue - format_url = format_dict.get('url') - if not format_url: - continue - format_id = format_dict.get('type') - ext = determine_ext(format_url) - if format_id == 'dash' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - format_url, display_id, mpd_id='dash', fatal=False)) - elif format_id == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, display_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - else: - formats.append({ - 'url': format_url, - 'format_id': format_id, - }) - self._sort_formats(formats) - - creator = series = None - tags = [] - thumbnails = [] - included = video.get('included') or [] - if isinstance(included, list): - for e in included: - attributes = e.get('attributes') - if not attributes: - continue - e_type = e.get('type') - if e_type == 'channel': - creator = attributes.get('name') - elif e_type == 'image': - src = attributes.get('src') - if src: - thumbnails.append({ - 'url': src, - 'width': int_or_none(attributes.get('width')), - 'height': int_or_none(attributes.get('height')), - }) - if e_type == 'show': - series = attributes.get('name') - elif e_type == 'tag': - name = attributes.get('name') - if name: - tags.append(name) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': strip_or_none(info.get('description')), - 'duration': float_or_none(info.get('videoDuration'), 1000), - 'timestamp': unified_timestamp(info.get('publishStart')), - 'series': series, - 'season_number': int_or_none(info.get('seasonNumber')), - 'episode_number': int_or_none(info.get('episodeNumber')), - 'creator': creator, - 'tags': tags, - 'thumbnails': thumbnails, - 'formats': formats, - } - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) + mobj = self._match_valid_url(url) display_id = mobj.group('id') - domain = mobj.group('domain').lstrip('www.') + domain = remove_start(mobj.group('domain'), 'www.') country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country') host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com' return self._get_disco_api_info( - url, display_id, host, 'dplay' + country, country) + url, display_id, host, 'dplay' + country, country, domain) + + +class DiscoveryNetworksDeIE(DPlayBaseIE): + # DE/UK pre-DiscoveryPlus: obsolete? + _VALID_URL = r'https?://(?:www\.)?(?P(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P[^/]+)/(?:video/)?(?P[^/]+)' + + _TESTS = [{ + 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', + 'info_dict': { + 'id': '78867', + 'ext': 'mp4', + 'title': 'Die Welt da draußen', + 'description': 'md5:61033c12b73286e409d99a41742ef608', + 'timestamp': 1554069600, + 'upload_date': '20190331', + 'creator': 'TLC', + 'season': 'Season 1', + 'series': 'Breaking Amish', + 'episode_number': 1, + 'tags': ['new york', 'großstadt', 'amische', 'landleben', 'modern', 'infos', 'tradition', 'herausforderung'], + 'display_id': 'breaking-amish/die-welt-da-drauen', + 'episode': 'Episode 1', + 'duration': 2625.024, + 'season_number': 1, + 'thumbnail': r're:https://.+\.jpg', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'HTTP 404 Cannot GET /programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', + }, { + 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316', + 'only_matching': True, + }, { + 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', + 'only_matching': True, + }, { + 'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/', + 'only_matching': True, + }] + + def _real_extract(self, url): + domain, programme, alternate_id = self._match_valid_url(url).groups() + country = 'GB' if domain == 'dplay.co.uk' else 'DE' + realm = 'dplay' if country == 'GB' else domain.replace('.', '') + return self._get_disco_api_info( + url, '%s/%s' % (programme, alternate_id), + 'sonic-eu1-prod.disco-api.com', realm, country) class DiscoveryPlusIE(DPlayIE): diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3a87f9e33..317ed75b7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -304,6 +304,7 @@ from .douyutv import ( DouyuTVIE, ) from .dplay import ( + DiscoveryNetworksDeIE, DPlayIE, DiscoveryPlusIE, HGTVDeIE, @@ -324,7 +325,6 @@ from .discoverygo import ( DiscoveryGoIE, DiscoveryGoPlaylistIE, ) -from .discoverynetworks import DiscoveryNetworksDeIE from .discoveryvr import DiscoveryVRIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE From ec131d42b1e020dfeb075698ea146ea111e9f5b2 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 21 May 2023 16:06:07 +0100 Subject: [PATCH 3/5] [DPlay] Merge from yt-dlp #2 * pull new (but possibly obsolete) extractors from yt-dlp --- youtube_dl/extractor/dplay.py | 540 +++++++++++++++++++++++++++-- youtube_dl/extractor/extractors.py | 21 +- 2 files changed, 534 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 1ead0c564..16881ef2c 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -383,27 +383,35 @@ class DiscoveryNetworksDeIE(DPlayBaseIE): 'sonic-eu1-prod.disco-api.com', realm, country) -class DiscoveryPlusIE(DPlayIE): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX +class HGTVDeIE(DPlayBaseIE): + _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX _TESTS = [{ - 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family', + 'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/', 'info_dict': { - 'id': '1140794', - 'display_id': 'property-brothers-forever-home/food-and-family', + 'id': '151205', + 'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette', 'ext': 'mp4', - 'title': 'Food and Family', - 'description': 'The brothers help a Richmond family expand their single-level home.', - 'duration': 2583.113, - 'timestamp': 1609304400, - 'upload_date': '20201230', + 'title': 'Wer braucht schon eine Toilette', + 'description': 'md5:05b40a27e7aed2c9172de34d459134e2', + 'duration': 1177.024, + 'timestamp': 1595705400, + 'upload_date': '20200725', 'creator': 'HGTV', - 'series': 'Property Brothers: Forever Home', - 'season_number': 1, - 'episode_number': 1, + 'series': 'Tiny House - klein, aber oho', + 'season_number': 3, + 'episode_number': 3, }, 'skip': 'Available for Premium users', }] + def _real_extract(self, url): + display_id = self._match_id(url) + return self._get_disco_api_info( + url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de') + + +class DiscoveryPlusBaseIE(DPlayBaseIE): + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0' @@ -427,23 +435,23 @@ class DiscoveryPlusIE(DPlayIE): url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us') -class HGTVDeIE(DPlayIE): - _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX +class DiscoveryPlusIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?:(?P\w{2})/)?video(?:/sport)?' + DPlayBaseIE._PATH_REGEX _TESTS = [{ - 'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/', + 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family', 'info_dict': { - 'id': '151205', - 'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette', + 'id': '1140794', + 'display_id': 'property-brothers-forever-home/food-and-family', 'ext': 'mp4', - 'title': 'Wer braucht schon eine Toilette', - 'description': 'md5:05b40a27e7aed2c9172de34d459134e2', - 'duration': 1177.024, - 'timestamp': 1595705400, - 'upload_date': '20200725', + 'title': 'Food and Family', + 'description': 'The brothers help a Richmond family expand their single-level home.', + 'duration': 2583.113, + 'timestamp': 1609304400, + 'upload_date': '20201230', 'creator': 'HGTV', - 'series': 'Tiny House - klein, aber oho', - 'season_number': 3, - 'episode_number': 3, + 'series': 'Property Brothers: Forever Home', + 'season_number': 1, + 'episode_number': 1, }, 'params': { 'format': 'bestvideo', @@ -454,3 +462,483 @@ class HGTVDeIE(DPlayIE): display_id = self._match_id(url) return self._get_disco_api_info( url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de') + + +class GoDiscoveryIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://go.discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer', + 'info_dict': { + 'id': '4164906', + 'display_id': 'dirty-jobs-discovery-atve-us/rodbuster-galvanizer', + 'ext': 'mp4', + 'title': 'Rodbuster / Galvanizer', + 'description': 'Mike installs rebar with a team of rodbusters, then he galvanizes steel.', + 'season_number': 9, + 'episode_number': 1, + }, + }] + + _PRODUCT = 'dsc' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.go.discovery.com', + 'realm': 'go', + 'country': 'us', + } + + +class TravelChannelIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:watch\.)?travelchannel\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely', + 'info_dict': { + 'id': '2220256', + 'display_id': 'ghost-adventures-travel-channel/ghost-train-of-ely', + 'ext': 'mp4', + 'title': 'Ghost Train of Ely', + 'description': 'The crew investigates the dark history of the Nevada Northern Railway.', + 'season_number': 24, + 'episode_number': 1, + }, + }] + + _PRODUCT = 'trav' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.watch.travelchannel.com', + 'realm': 'go', + 'country': 'us', + } + + +class CookingChannelIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:watch\.)?cookingchanneltv\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634', + 'info_dict': { + 'id': '2348634', + 'display_id': 'carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634', + 'ext': 'mp4', + 'title': 'The Postman Always Brings Rice', + 'description': 'Noah visits the Maui Fair and the Aurora Winter Festival in Vancouver.', + 'season_number': 9, + 'episode_number': 1, + }, + }] + + _PRODUCT = 'cook' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.watch.cookingchanneltv.com', + 'realm': 'go', + 'country': 'us', + } + + +class HGTVUsaIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:watch\.)?hgtv\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house', + 'info_dict': { + 'id': '4289736', + 'display_id': 'home-inspector-joe-hgtv-atve-us/this-mold-house', + 'ext': 'mp4', + 'title': 'This Mold House', + 'description': 'Joe and Noel help take a familys dream home from hazardous to fabulous.', + 'season_number': 1, + 'episode_number': 1, + }, + }] + + _PRODUCT = 'hgtv' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.watch.hgtv.com', + 'realm': 'go', + 'country': 'us', + } + + +class FoodNetworkIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:watch\.)?foodnetwork\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly', + 'info_dict': { + 'id': '4116449', + 'display_id': 'kids-baking-championship-food-network/float-like-a-butterfly', + 'ext': 'mp4', + 'title': 'Float Like a Butterfly', + 'description': 'The 12 kid bakers create colorful carved butterfly cakes.', + 'season_number': 10, + 'episode_number': 1, + }, + }] + + _PRODUCT = 'food' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.watch.foodnetwork.com', + 'realm': 'go', + 'country': 'us', + } + + +class DestinationAmericaIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?destinationamerica\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot', + 'info_dict': { + 'id': '4210904', + 'display_id': 'alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot', + 'ext': 'mp4', + 'title': 'Central Alaskas Bigfoot', + 'description': 'A team heads to central Alaska to investigate an aggressive Bigfoot.', + 'season_number': 1, + 'episode_number': 1, + }, + }] + + _PRODUCT = 'dam' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.destinationamerica.com', + 'realm': 'go', + 'country': 'us', + } + + +class InvestigationDiscoveryIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?investigationdiscovery\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown', + 'info_dict': { + 'id': '2139409', + 'display_id': 'unmasked-investigation-discovery/the-killer-clown', + 'ext': 'mp4', + 'title': 'The Killer Clown', + 'description': 'A wealthy Florida woman is fatally shot in the face by a clown at her door.', + 'season_number': 1, + 'episode_number': 1, + }, + }] + + _PRODUCT = 'ids' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.investigationdiscovery.com', + 'realm': 'go', + 'country': 'us', + } + + +class AmHistoryChannelIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?ahctv\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army', + 'info_dict': { + 'id': '2309730', + 'display_id': 'modern-sniper-ahc/army', + 'ext': 'mp4', + 'title': 'Army', + 'description': 'Snipers today face challenges their predecessors couldve only dreamed of.', + 'season_number': 1, + 'episode_number': 1, + }, + }] + + _PRODUCT = 'ahc' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.ahctv.com', + 'realm': 'go', + 'country': 'us', + } + + +class ScienceChannelIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine', + 'info_dict': { + 'id': '2842849', + 'display_id': 'strangest-things-science-atve-us/nazi-mystery-machine', + 'ext': 'mp4', + 'title': 'Nazi Mystery Machine', + 'description': 'Experts investigate the secrets of a revolutionary encryption machine.', + 'season_number': 1, + 'episode_number': 1, + }, + }] + + _PRODUCT = 'sci' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.sciencechannel.com', + 'realm': 'go', + 'country': 'us', + } + + +class DIYNetworkIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas', + 'info_dict': { + 'id': '2309730', + 'display_id': 'pool-kings-diy-network/bringing-beach-life-to-texas', + 'ext': 'mp4', + 'title': 'Bringing Beach Life to Texas', + 'description': 'The Pool Kings give a family a day at the beach in their own backyard.', + 'season_number': 10, + 'episode_number': 2, + }, + }] + + _PRODUCT = 'diy' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.watch.diynetwork.com', + 'realm': 'go', + 'country': 'us', + } + + +class DiscoveryLifeIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma', + 'info_dict': { + 'id': '2218238', + 'display_id': 'surviving-death-discovery-life-atve-us/bodily-trauma', + 'ext': 'mp4', + 'title': 'Bodily Trauma', + 'description': 'Meet three people who tested the limits of the human body.', + 'season_number': 1, + 'episode_number': 2, + }, + }] + + _PRODUCT = 'dlf' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.discoverylife.com', + 'realm': 'go', + 'country': 'us', + } + + +class AnimalPlanetIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown', + 'info_dict': { + 'id': '3338923', + 'display_id': 'north-woods-law-animal-planet/squirrel-showdown', + 'ext': 'mp4', + 'title': 'Squirrel Showdown', + 'description': 'A woman is suspected of being in possession of flying squirrel kits.', + 'season_number': 16, + 'episode_number': 11, + }, + }] + + _PRODUCT = 'apl' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.animalplanet.com', + 'realm': 'go', + 'country': 'us', + } + + +class TLCIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:go\.)?tlc\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1', + 'info_dict': { + 'id': '2206540', + 'display_id': 'my-600-lb-life-tlc/melissas-story-part-1', + 'ext': 'mp4', + 'title': 'Melissas Story (Part 1)', + 'description': 'At 650 lbs, Melissa is ready to begin her seven-year weight loss journey.', + 'season_number': 1, + 'episode_number': 1, + }, + }] + + _PRODUCT = 'tlc' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.tlc.com', + 'realm': 'go', + 'country': 'us', + } + + +class MotorTrendIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas', + 'info_dict': { + 'id': '"4859182"', + 'display_id': 'double-dakotas', + 'ext': 'mp4', + 'title': 'Double Dakotas', + 'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.', + 'season_number': 2, + 'episode_number': 3, + }, + }] + + _PRODUCT = 'vel' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.watch.motortrend.com', + 'realm': 'go', + 'country': 'us', + } + + +class MotorTrendOnDemandIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?motortrendondemand\.com/detail' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784', + 'info_dict': { + 'id': '37699', + 'display_id': 'wheelstanding-dump-truck-stubby-bobs-comeback/37699', + 'ext': 'mp4', + 'title': 'Wheelstanding Dump Truck! Stubby Bob’s Comeback', + 'description': 'md5:996915abe52a1c3dfc83aecea3cce8e7', + 'season_number': 5, + 'episode_number': 52, + 'episode': 'Episode 52', + 'season': 'Season 5', + 'thumbnail': r're:^https?://.+\.jpe?g$', + 'timestamp': 1388534401, + 'duration': 1887.345, + 'creator': 'Originals', + 'series': 'Roadkill', + 'upload_date': '20140101', + 'tags': [], + }, + }] + + _PRODUCT = 'MTOD' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.motortrendondemand.com', + 'realm': 'motortrend', + 'country': 'us', + } + + +class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE', + 'info_dict': { + 'id': '27104', + 'ext': 'mp4', + 'display_id': 'how-do-they-do-it/fugu-and-more', + 'title': 'Fugu and More', + 'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.', + 'duration': 1319.32, + 'timestamp': 1582309800, + 'upload_date': '20200221', + 'series': 'How Do They Do It?', + 'season_number': 8, + 'episode_number': 2, + 'creator': 'Discovery Channel', + 'thumbnail': r're:https://.+\.jpeg', + 'episode': 'Episode 2', + 'season': 'Season 8', + 'tags': [], + }, + 'params': { + 'format': 'best/bestvideo', + 'skip_download': True, + }, + 'expected_warnings': [ + 'Unknown MIME type image/jpeg', + ], + }] + + _PRODUCT = 'dplus-india' + _DISCO_API_PARAMS = { + 'disco_host': 'ap2-prod-direct.discoveryplus.in', + 'realm': 'dplusindia', + 'country': 'in', + 'domain': 'https://www.discoveryplus.in/', + } + + +class DiscoveryPlusShowBaseIE(DPlayBaseIE): + + # these must be set in any subclass + # _DOMAIN = NotImplemented + # _BASE_API = NotImplemented + # _REALM = NotImplemented + # _SHOW_STR = NotImplemented + + # these may need to be overridden in any subclass + _PRODUCT = 'dplay-client' + _DISCO_CLIENT_VER = '2.6.0' + _INDEX = 1 + _VIDEO_IE = DPlayIE + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-client': 'WEB:UNKNOWN:%s:%s' % (self._PRODUCT, self._DISCO_CLIENT_VER), + 'x-disco-params': 'realm=%s' % (realm, ), + 'referer': self._DOMAIN, + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) + + def _entries(self, show_name): + headers = {} + self._update_disco_api_headers(headers, self._BASE_API, None, self._REALM) + show_json = self._download_json( + '{0}cms/routes/{1}/{2}?include=default'.format(self._BASE_API, self._SHOW_STR, show_name), + video_id=show_name, headers=headers)['included'][self._INDEX]['attributes']['component'] + show_id = show_json['mandatoryParams'].split('=')[-1] + season_url = self._BASE_API + 'content/videos?sort=episodeNumber&filter[seasonNumber]={0}&filter[show.id]={1}&page[size]=100&page[number]={2!s}' + for season_id in traverse_obj(show_json, ('filters', 0, 'options', Ellipsis, 'id')): + total_pages, page_num = 1, 0 + while page_num < total_pages: + season_json = self._download_json( + season_url.format(season_id, show_id, page_num + 1), show_name, headers=headers, + note='Downloading season %s JSON metadata%s' % (season_id, ' page %d' % page_num if page_num else '')) + if page_num == 0: + total_pages = traverse_obj(season_json, ('meta', 'totalPages', T(int))) or 1 + for episode in traverse_obj(season_json, ('data', Ellipsis, T(dict))): + video_path = traverse_obj(episode, ('attributes', 'path')) + if video_path: + yield self.url_result( + '{0}videos/{1}'.format(self._DOMAIN, video_path), + ie=self._VIDEO_IE.ie_key(), video_id=episode.get('id') or video_path) + page_num += 1 + + def _real_extract(self, url): + show_name = self._match_valid_url(url).group('show_name') + return self.playlist_result(self._entries(show_name), playlist_id=show_name) + + +class DiscoveryPlusItalyShowIE(DiscoveryPlusShowBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.it/programmi/(?P[^/]+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.discoveryplus.it/programmi/deal-with-it-stai-al-gioco', + 'playlist_mincount': 168, + 'info_dict': { + 'id': 'deal-with-it-stai-al-gioco', + }, + }] + _BASE_API = 'https://disco-api.discoveryplus.it/' + _DOMAIN = 'https://www.discoveryplus.it/' + _REALM = 'dplayit' + _SHOW_STR = 'programmi' + + +class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P[^/]+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.discoveryplus.in/show/how-do-they-do-it', + 'playlist_mincount': 140, + 'info_dict': { + 'id': 'how-do-they-do-it', + }, + }] + + _BASE_API = 'https://ap2-prod-direct.discoveryplus.in/' + _DOMAIN = 'https://www.discoveryplus.in/' + _PRODUCT = 'dplus-india' + _DISCO_CLIENT_VER = 'prod' + _REALM = 'dplusindia' + _SHOW_STR = 'show' + _INDEX = 4 + _VIDEO_IE = DiscoveryPlusIndiaIE diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 317ed75b7..e6c1842bc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -304,10 +304,29 @@ from .douyutv import ( DouyuTVIE, ) from .dplay import ( + AmHistoryChannelIE, + AnimalPlanetIE, + DiscoveryLifeIE, DiscoveryNetworksDeIE, - DPlayIE, DiscoveryPlusIE, + DiscoveryPlusIndiaIE, + DiscoveryPlusIndiaShowIE, + DiscoveryPlusItalyShowIE, + DIYNetworkIE, + DPlayIE, + CookingChannelIE, + DestinationAmericaIE, + DiscoveryPlusIE, + FoodNetworkIE, + GoDiscoveryIE, HGTVDeIE, + HGTVUsaIE, + InvestigationDiscoveryIE, + MotorTrendIE, + MotorTrendOnDemandIE, + ScienceChannelIE, + TLCIE, + TravelChannelIE, ) from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE From 8529f9fea64185e640025354732999da884c5524 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 21 May 2023 16:08:39 +0100 Subject: [PATCH 4/5] [DPlay] Consolidate D+ regions and topic-based channels --- youtube_dl/extractor/dplay.py | 118 +++++++++++++++++++++++++++++++--- 1 file changed, 110 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 16881ef2c..007a0cdcf 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -411,9 +411,15 @@ class HGTVDeIE(DPlayBaseIE): class DiscoveryPlusBaseIE(DPlayBaseIE): + _DISCO_CLIENT_VER = '27.42.0' def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0' + # https://github.com/yt-dlp/yt-dlp/issues/2138#issuecomment-1546623404 + headers.update({ + 'x-disco-params': 'realm=%s' % (realm, ), + 'x-disco-client': 'WEB:UNKNOWN:%s:%s' % (self._PRODUCT, self._DISCO_CLIENT_VER), + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) def _download_video_playback_info(self, disco_base, video_id, headers): return self._download_json( @@ -425,14 +431,12 @@ class DiscoveryPlusBaseIE(DPlayBaseIE): 'videoId': video_id, 'wisteriaProperties': { 'platform': 'desktop', - 'product': 'dplus_us', + 'product': self._PRODUCT, }, }).encode('utf-8'))['data']['attributes']['streaming'] def _real_extract(self, url): - display_id = self._match_id(url) - return self._get_disco_api_info( - url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us') + return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS) class DiscoveryPlusIE(DiscoveryPlusBaseIE): @@ -453,15 +457,113 @@ class DiscoveryPlusIE(DiscoveryPlusBaseIE): 'season_number': 1, 'episode_number': 1, }, + # 'skip': 'not.found - video ... filtered by validator, reasonCode=15', + }, { + 'url': 'https://discoveryplus.com/ca/video/bering-sea-gold-discovery-ca/goldslingers', + 'only_matching': True, + }, { + 'url': 'https://www.discoveryplus.com/de/video/sport/eurosport-1-germany-eurosport-1-germany-sport/weltcup-jakarta-4663856', + 'info_dict': { + 'id': '965152', + 'ext': 'mp4', + 'display_id': 'eurosport-1-germany-eurosport-1-germany-sport/weltcup-jakarta-4663856', + }, + 'params': { + 'format': 'best/bestvideo', + 'skip_download': True, + }, + 'expected_warnings': [ + 'Unknown MIME type image/jpeg', + ], + 'skip': 'This video is only available for registered users', + }, { + 'url': 'https://www.discoveryplus.com/gb/video/richard-hammonds-workshop/season-1-episode-1', + 'info_dict': { + 'id': '965152', + 'ext': 'mp4', + 'display_id': 'richard-hammonds-workshop/season-1-episode-1', + }, + 'params': { + 'format': 'best/bestvideo', + 'skip_download': True, + }, + 'expected_warnings': [ + 'Unknown MIME type image/jpeg', + ], + 'skip': 'This video is only available for registered users', + }, { + 'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi', + 'only_matching': True, + }, { + 'url': 'https://www.discoveryplus.com/it/video/super-benny/trailer', + 'info_dict': { + 'id': '2440530', + 'ext': 'mp4', + 'title': 'Trailer', + 'description': 'In arrivo a giugno - Scopri la sigla di Super Benny!', + 'timestamp': 1653030000, + 'upload_date': '20220520', + }, + 'params': { + 'format': 'best/bestvideo', + 'skip_download': True, + }, + 'expected_warnings': [ + 'Unknown MIME type image/jpeg', + ], + }, { + 'url': 'https://www.discoveryplus.com/no/video/estonia/forliset', + 'info_dict': { + 'id': '488965', + 'ext': 'mp4', + 'title': 'Forliset', + 'description': 'md5:4c6d28d266dbceb0e6ccc16bc9da22f4', + 'timestamp': 1601267400, + 'upload_date': '20200928', + }, 'params': { 'format': 'bestvideo', + 'skip_download': True, }, + 'expected_warnings': [ + 'Unknown MIME type ', + ], }] + _PRODUCT = 'dplus_us' + _DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.discoveryplus.com', + 'realm': 'go', + 'country': 'us', + } + _REGIONS = { + 'de': ('dplus_de', { + 'disco_host': 'eu1-prod-direct.discoveryplus.com', + 'realm': 'dplay', + 'country': 'de', + }), + 'gb': ('dplus_gb', { + 'disco_host': 'eu1-prod-direct.discoveryplus.com', + 'realm': 'dplay', + 'country': 'gb', + }), + 'it': ('dplus_us', { + 'disco_host': 'eu1-prod-direct.discoveryplus.com', + 'realm': 'dplay', + 'country': 'it', + }), + 'no': ('dplus_no', { + 'disco_host': 'eu1-prod-direct.discoveryplus.com', + 'realm': 'dplay', + 'country': 'no', + }), + } + def _real_extract(self, url): - display_id = self._match_id(url) - return self._get_disco_api_info( - url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de') + region = self._REGIONS.get(self._match_valid_url(url).group('region')) + if region: + self._PRODUCT, self._DISCO_API_PARAMS = region + return super(DiscoveryPlusIE, self)._real_extract(url) class GoDiscoveryIE(DiscoveryPlusBaseIE): From f55ab27160a3de31ec9c8a454d4596c002b17ea3 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 21 May 2023 16:12:07 +0100 Subject: [PATCH 5/5] [DPlay] Fix tests --- youtube_dl/extractor/dplay.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 007a0cdcf..aea238c7d 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -218,9 +218,10 @@ class DPlayIE(DPlayBaseIE): 'episode_number': 1, }, 'params': { - 'format': 'bestvideo', + 'format': 'best/bestvideo', 'skip_download': True, }, + 'skip': 'HTTP redirect to D+.com, "filtered by validator, reasonCode=14"', }, { # geo restricted, via secure api, unsigned download hls URL 'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster', @@ -239,9 +240,10 @@ class DPlayIE(DPlayBaseIE): 'episode_number': 1, }, 'params': { - 'format': 'bestvideo', + 'format': 'best/bestvideo', 'skip_download': True, }, + 'skip': 'HTTP redirect to D+.com, "filtered by validator, reasonCode=14"', }, { # disco-api 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7', @@ -259,10 +261,10 @@ class DPlayIE(DPlayBaseIE): 'episode_number': 7, }, 'params': { - 'format': 'bestvideo', + 'format': 'best/bestvideo', 'skip_download': True, }, - 'skip': 'Available for Premium users', + 'skip': 'HTTP redirect to D+.com, "filtered by validator, reasonCode=14"', }, { 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', 'md5': '2b808ffb00fc47b884a172ca5d13053c', @@ -280,6 +282,7 @@ class DPlayIE(DPlayBaseIE): 'episode': 'Episode 1', 'episode_number': 1, }, + 'skip': 'Only available in Italy/Europe', }, { 'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/', 'info_dict': { @@ -299,6 +302,7 @@ class DPlayIE(DPlayBaseIE): 'params': { 'skip_download': True, }, + 'skip': 'This video is only available for registered users', }, { 'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16', 'only_matching': True, @@ -401,7 +405,10 @@ class HGTVDeIE(DPlayBaseIE): 'season_number': 3, 'episode_number': 3, }, - 'skip': 'Available for Premium users', + 'params': { + 'format': 'best/bestvideo', + }, + 'skip': 'HTTP 404 Not Found', }] def _real_extract(self, url): @@ -579,6 +586,7 @@ class GoDiscoveryIE(DiscoveryPlusBaseIE): 'season_number': 9, 'episode_number': 1, }, + 'skip': 'not.found - video ... could not be found', }] _PRODUCT = 'dsc' @@ -602,6 +610,7 @@ class TravelChannelIE(DiscoveryPlusBaseIE): 'season_number': 24, 'episode_number': 1, }, + 'skip': 'not.found - video ... was filtered by validator, reasonCode=15', }] _PRODUCT = 'trav' @@ -625,6 +634,7 @@ class CookingChannelIE(DiscoveryPlusBaseIE): 'season_number': 9, 'episode_number': 1, }, + 'skip': 'not.found - video ... could not be found', }] _PRODUCT = 'cook' @@ -648,6 +658,7 @@ class HGTVUsaIE(DiscoveryPlusBaseIE): 'season_number': 1, 'episode_number': 1, }, + 'skip': 'not.found - video ... could not be found', }] _PRODUCT = 'hgtv' @@ -671,6 +682,7 @@ class FoodNetworkIE(DiscoveryPlusBaseIE): 'season_number': 10, 'episode_number': 1, }, + 'skip': 'not.found - video ... was filtered by validator, reasonCode=15', }] _PRODUCT = 'food' @@ -694,6 +706,7 @@ class DestinationAmericaIE(DiscoveryPlusBaseIE): 'season_number': 1, 'episode_number': 1, }, + 'skip': 'not.found - video ... was filtered by validator, reasonCode=15', }] _PRODUCT = 'dam' @@ -717,6 +730,7 @@ class InvestigationDiscoveryIE(DiscoveryPlusBaseIE): 'season_number': 1, 'episode_number': 1, }, + 'skip': 'not.found - video ... could not be found', }] _PRODUCT = 'ids' @@ -740,6 +754,7 @@ class AmHistoryChannelIE(DiscoveryPlusBaseIE): 'season_number': 1, 'episode_number': 1, }, + 'skip': 'not.found - video ... could not be found', }] _PRODUCT = 'ahc' @@ -763,6 +778,7 @@ class ScienceChannelIE(DiscoveryPlusBaseIE): 'season_number': 1, 'episode_number': 1, }, + 'skip': 'not.found - video ... could not be found', }] _PRODUCT = 'sci' @@ -786,6 +802,7 @@ class DIYNetworkIE(DiscoveryPlusBaseIE): 'season_number': 10, 'episode_number': 2, }, + 'skip': 'not.found - video ... was filtered by validator, reasonCode=15', }] _PRODUCT = 'diy' @@ -809,6 +826,7 @@ class DiscoveryLifeIE(DiscoveryPlusBaseIE): 'season_number': 1, 'episode_number': 2, }, + 'skip': 'not.found - video ... could not be found', }] _PRODUCT = 'dlf' @@ -832,6 +850,7 @@ class AnimalPlanetIE(DiscoveryPlusBaseIE): 'season_number': 16, 'episode_number': 11, }, + 'skip': 'not.found - video ... was filtered by validator, reasonCode=15', }] _PRODUCT = 'apl' @@ -855,6 +874,7 @@ class TLCIE(DiscoveryPlusBaseIE): 'season_number': 1, 'episode_number': 1, }, + 'skip': 'not.found - video ... could not be found', }] _PRODUCT = 'tlc' @@ -878,6 +898,7 @@ class MotorTrendIE(DiscoveryPlusBaseIE): 'season_number': 2, 'episode_number': 3, }, + 'skip': 'not.found - video ... was filtered by validator, reasonCode=15', }] _PRODUCT = 'vel' @@ -910,6 +931,7 @@ class MotorTrendOnDemandIE(DiscoveryPlusBaseIE): 'upload_date': '20140101', 'tags': [], }, + 'skip': 'This video is only available for registered users.', }] _PRODUCT = 'MTOD' @@ -949,6 +971,7 @@ class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE): 'expected_warnings': [ 'Unknown MIME type image/jpeg', ], + 'skip': 'not.found - video ... was filtered by validator, reasonCode=9 (but available in UK, India?, ...)?', }] _PRODUCT = 'dplus-india'