Merge b83889a20e into 668332b973

[YouPorn] Add playlist extractors
* YouPornCategoryIE * YouPornChannelIE * YouPornCollectionIE * YouPornStarIE * YouPornTagIE * YouPornVideosIE,
2024-05-06 11:10:48 +08:00 · 2024-04-22 01:34:26 +01:00 · 2024-04-22 01:34:26 +01:00 · 2024-04-22 01:34:26 +01:00 · 2024-04-22 01:34:26 +01:00 · 2024-04-07 15:33:30 +01:00
21 changed files with 2044 additions and 386 deletions
--- a/test/test_download.py
+++ b/test/test_download.py
@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from test.helper import (
    assertGreaterEqual,
    assertLessEqual,
    expect_warnings,
    get_params,
    gettestcases,
@ -122,7 +123,10 @@ def generator(test_case, tname):
        params['outtmpl'] = tname + '_' + params['outtmpl']
        if is_playlist and 'playlist' not in test_case:
            params.setdefault('extract_flat', 'in_playlist')
-            params.setdefault('playlistend', test_case.get('playlist_mincount'))
+            params.setdefault('playlistend',
                              test_case['playlist_maxcount'] + 1
                              if test_case.get('playlist_maxcount')
                              else test_case.get('playlist_mincount'))
            params.setdefault('skip_download', True)
        ydl = YoutubeDL(params, auto_init=False)
@ -190,6 +194,14 @@ def generator(test_case, tname):
                    'Expected at least %d in playlist %s, but got only %d' % (
                        test_case['playlist_mincount'], test_case['url'],
                        len(res_dict['entries'])))
            if 'playlist_maxcount' in test_case:
                assertLessEqual(
                    self,
                    len(res_dict['entries']),
                    test_case['playlist_maxcount'],
                    'Expected at most %d in playlist %s, but got %d' % (
                        test_case['playlist_maxcount'], test_case['url'],
                        len(res_dict['entries'])))
            if 'playlist_count' in test_case:
                self.assertEqual(
                    len(res_dict['entries']),
--- a/test/test_downloader_external.py
+++ b/test/test_downloader_external.py
@ -18,6 +18,7 @@ from test.helper import (
 )
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import (
    compat_contextlib_suppress,
    compat_http_cookiejar_Cookie,
    compat_http_server,
    compat_kwargs,
@ -35,6 +36,9 @@ from youtube_dl.downloader.external import (
    HttpieFD,
    WgetFD,
 )
 from youtube_dl.postprocessor import (
    FFmpegPostProcessor,
 )
 import threading
 TEST_SIZE = 10 * 1024
@ -227,7 +231,17 @@ class TestAria2cFD(unittest.TestCase):
            self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd)
-@ifExternalFDAvailable(FFmpegFD)
+# Handle delegated availability
 def ifFFmpegFDAvailable(externalFD):
    # raise SkipTest, or set False!
    avail = ifExternalFDAvailable(externalFD) and False
    with compat_contextlib_suppress(Exception):
        avail = FFmpegPostProcessor(downloader=None).available
    return unittest.skipUnless(
        avail, externalFD.get_basename() + ' not found')
@ifFFmpegFDAvailable(FFmpegFD)
 class TestFFmpegFD(unittest.TestCase):
    _args = []
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -81,6 +81,7 @@ from youtube_dl.utils import (
    sanitize_filename,
    sanitize_path,
    sanitize_url,
    sanitized_Request,
    shell_quote,
    smuggle_url,
    str_or_none,
@ -255,6 +256,18 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
        self.assertEqual(sanitize_url('foo bar'), 'foo bar')
    def test_sanitized_Request(self):
        self.assertFalse(sanitized_Request('http://foo.bar').has_header('Authorization'))
        self.assertFalse(sanitized_Request('http://:foo.bar').has_header('Authorization'))
        self.assertEqual(sanitized_Request('http://@foo.bar').get_header('Authorization'),
                         'Basic Og==')
        self.assertEqual(sanitized_Request('http://:pass@foo.bar').get_header('Authorization'),
                         'Basic OnBhc3M=')
        self.assertEqual(sanitized_Request('http://user:@foo.bar').get_header('Authorization'),
                         'Basic dXNlcjo=')
        self.assertEqual(sanitized_Request('http://user:pass@foo.bar').get_header('Authorization'),
                         'Basic dXNlcjpwYXNz')
    def test_expand_path(self):
        def env(var):
            return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
@ -512,11 +525,14 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(float_or_none(set()), None)
    def test_int_or_none(self):
        self.assertEqual(int_or_none(42), 42)
        self.assertEqual(int_or_none('42'), 42)
        self.assertEqual(int_or_none(''), None)
        self.assertEqual(int_or_none(None), None)
        self.assertEqual(int_or_none([]), None)
        self.assertEqual(int_or_none(set()), None)
        self.assertEqual(int_or_none('42', base=8), 34)
        self.assertRaises(TypeError, int_or_none(42, base=8))
    def test_str_to_int(self):
        self.assertEqual(str_to_int('123,456'), 123456)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@ -2421,29 +2421,26 @@ except ImportError:  # Python 2
 compat_urllib_request_urlretrieve = compat_urlretrieve
 try:
    from HTMLParser import (
        HTMLParser as compat_HTMLParser,
        HTMLParseError as compat_HTMLParseError)
 except ImportError:  # Python 3
    from html.parser import HTMLParser as compat_HTMLParser
 except ImportError:  # Python 2
    from HTMLParser import HTMLParser as compat_HTMLParser
 compat_html_parser_HTMLParser = compat_HTMLParser
 try:  # Python 2
    from HTMLParser import HTMLParseError as compat_HTMLParseError
 except ImportError:  # Python <3.4
    try:
        from html.parser import HTMLParseError as compat_HTMLParseError
    except ImportError:  # Python >3.4
-
+        # HTMLParseError was deprecated in Python 3.3 and removed in
        # HTMLParseError has been deprecated in Python 3.3 and removed in
        # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
        # and uniform cross-version exception handling
        class compat_HTMLParseError(Exception):
            pass
 compat_html_parser_HTMLParser = compat_HTMLParser
 compat_html_parser_HTMLParseError = compat_HTMLParseError
 try:
-    from subprocess import DEVNULL
+    _DEVNULL = subprocess.DEVNULL
-    compat_subprocess_get_DEVNULL = lambda: DEVNULL
+    compat_subprocess_get_DEVNULL = lambda: _DEVNULL
-except ImportError:
+except AttributeError:
    compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
 try:
@ -2943,6 +2940,51 @@ else:
    compat_socket_create_connection = socket.create_connection
 try:
    from contextlib import suppress as compat_contextlib_suppress
 except ImportError:
    class compat_contextlib_suppress(object):
        _exceptions = None
        def __init__(self, *exceptions):
            super(compat_contextlib_suppress, self).__init__()
            # TODO: [Base]ExceptionGroup (3.12+)
            self._exceptions = exceptions
        def __enter__(self):
            return self
        def __exit__(self, exc_type, exc_val, exc_tb):
            return exc_val is not None and isinstance(exc_val, self._exceptions or tuple())
 # subprocess.Popen context manager
 # avoids leaking handles if .communicate() is not called
 try:
    _Popen = subprocess.Popen
    # check for required context manager attributes
    _Popen.__enter__ and _Popen.__exit__
    compat_subprocess_Popen = _Popen
 except AttributeError:
    # not a context manager - make one
    from contextlib import contextmanager
    @contextmanager
    def compat_subprocess_Popen(*args, **kwargs):
        popen = None
        try:
            popen = _Popen(*args, **kwargs)
            yield popen
        finally:
            if popen:
                for f in (popen.stdin, popen.stdout, popen.stderr):
                    if f:
                        # repeated .close() is OK, but just in case
                        with compat_contextlib_suppress(EnvironmentError):
                            f.close()
                popen.wait()
 # Fix https://github.com/ytdl-org/youtube-dl/issues/4223
 # See http://bugs.python.org/issue9161 for what is broken
 def workaround_optparse_bug9161():
@ -3263,6 +3305,7 @@ __all__ = [
    'compat_http_cookiejar_Cookie',
    'compat_http_cookies',
    'compat_http_cookies_SimpleCookie',
    'compat_contextlib_suppress',
    'compat_ctypes_WINFUNCTYPE',
    'compat_etree_fromstring',
    'compat_filter',
@ -3298,6 +3341,7 @@ __all__ = [
    'compat_struct_pack',
    'compat_struct_unpack',
    'compat_subprocess_get_DEVNULL',
    'compat_subprocess_Popen',
    'compat_tokenize_tokenize',
    'compat_urllib_error',
    'compat_urllib_parse',
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@ -11,8 +11,14 @@ from .common import FileDownloader
 from ..compat import (
    compat_setenv,
    compat_str,
    compat_subprocess_Popen,
 )
-from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
+
 try:
    from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
 except ImportError:
    FFmpegPostProcessor = None
 from ..utils import (
    cli_option,
    cli_valueless_option,
@ -206,7 +212,10 @@ class WgetFD(ExternalFD):
                retry[1] = '0'
            cmd += retry
        cmd += self._option('--bind-address', 'source_address')
-        cmd += self._option('--proxy', 'proxy')
+        proxy = self.params.get('proxy')
        if proxy:
            for var in ('http_proxy', 'https_proxy'):
                cmd += ['--execute', '%s=%s' % (var, proxy)]
        cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
        cmd += self._configuration_args()
        cmd += ['--', info_dict['url']]
@ -358,13 +367,14 @@ class FFmpegFD(ExternalFD):
    @classmethod
    def available(cls):
-        return FFmpegPostProcessor().available
+        # actual availability can only be confirmed for an instance
        return bool(FFmpegPostProcessor)
    def _call_downloader(self, tmpfilename, info_dict):
-        url = info_dict['url']
+        # `downloader` means the parent `YoutubeDL`
-        ffpp = FFmpegPostProcessor(downloader=self)
+        ffpp = FFmpegPostProcessor(downloader=self.ydl)
        if not ffpp.available:
-            self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+            self.report_error('ffmpeg required for download but no ffmpeg (nor avconv) executable could be found. Please install one.')
            return False
        ffpp.check_version()
@ -393,6 +403,7 @@ class FFmpegFD(ExternalFD):
        # if end_time:
        #     args += ['-t', compat_str(end_time - start_time)]
        url = info_dict['url']
        cookies = self.ydl.cookiejar.get_cookies_for_url(url)
        if cookies:
            args.extend(['-cookies', ''.join(
@ -480,21 +491,25 @@ class FFmpegFD(ExternalFD):
        self._debug_cmd(args)
-        proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
+        # From [1], a PIPE opened in Popen() should be closed, unless
-        try:
+        # .communicate() is called. Avoid leaking any PIPEs by using Popen
-            retval = proc.wait()
+        # as a context manager (newer Python 3.x and compat)
-        except BaseException as e:
+        # Fixes "Resource Warning" in test/test_downloader_external.py
-            # subprocess.run would send the SIGKILL signal to ffmpeg and the
+        # [1] https://devpress.csdn.net/python/62fde12d7e66823466192e48.html
-            # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+        with compat_subprocess_Popen(args, stdin=subprocess.PIPE, env=env) as proc:
-            # produces a file that is playable (this is mostly useful for live
+            try:
-            # streams). Note that Windows is not affected and produces playable
+                retval = proc.wait()
-            # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
+            except BaseException as e:
-            if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
+                # subprocess.run would send the SIGKILL signal to ffmpeg and the
-                process_communicate_or_kill(proc, b'q')
+                # mp4 file couldn't be played, but if we ask ffmpeg to quit it
-            else:
+                # produces a file that is playable (this is mostly useful for live
-                proc.kill()
+                # streams). Note that Windows is not affected and produces playable
-                proc.wait()
+                # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
-            raise
+                if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
                    process_communicate_or_kill(proc, b'q')
                else:
                    proc.kill()
                raise
        return retval
--- a/youtube_dl/extractor/caffeine.py
+++ b/youtube_dl/extractor/caffeine.py
@ -0,0 +1,79 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    int_or_none,
    merge_dicts,
    parse_iso8601,
    T,
    traverse_obj,
    txt_or_none,
    urljoin,
 )
 class CaffeineTVIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/]+/video/(?P<id>[0-9a-f-]+)'
    _TESTS = [{
        'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
        'info_dict': {
            'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
            'ext': 'mp4',
            'title': 'GOOOOD MORNINNNNN #highlights',
            'timestamp': 1654702180,
            'upload_date': '20220608',
            'uploader': 'TsuSurf',
            'duration': 3145,
            'age_limit': 17,
        },
        'params': {
            'format': 'bestvideo',
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        json_data = self._download_json(
            'https://api.caffeine.tv/social/public/activity/' + video_id,
            video_id)
        broadcast_info = traverse_obj(json_data, ('broadcast_info', T(dict))) or {}
        title = broadcast_info['broadcast_title']
        video_url = broadcast_info['video_url']
        ext = determine_ext(video_url)
        if ext == 'm3u8':
            formats = self._extract_m3u8_formats(
                video_url, video_id, 'mp4', entry_protocol='m3u8',
                fatal=False)
        else:
            formats = [{'url': video_url}]
        self._sort_formats(formats)
        return merge_dicts({
            'id': video_id,
            'title': title,
            'formats': formats,
        }, traverse_obj(json_data, {
            'uploader': ((None, 'user'), 'username'),
        }, get_all=False), traverse_obj(json_data, {
            'like_count': ('like_count', T(int_or_none)),
            'view_count': ('view_count', T(int_or_none)),
            'comment_count': ('comment_count', T(int_or_none)),
            'tags': ('tags', Ellipsis, T(txt_or_none)),
            'is_live': 'is_live',
            'uploader': ('user', 'name'),
        }), traverse_obj(broadcast_info, {
            'duration': ('content_duration', T(int_or_none)),
            'timestamp': ('broadcast_start_time', T(parse_iso8601)),
            'thumbnail': ('preview_image_path', T(lambda u: urljoin(url, u))),
            'age_limit': ('content_rating', T(lambda r: r and {
                # assume Apple Store ratings [1]
                # 1. https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
                'FOUR_PLUS': 0,
                'NINE_PLUS': 9,
                'TWELVE_PLUS': 12,
                'SEVENTEEN_PLUS': 17,
            }.get(r, 17))),
        }))
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -25,6 +25,7 @@ from ..compat import (
    compat_getpass,
    compat_integer_types,
    compat_http_client,
    compat_kwargs,
    compat_map as map,
    compat_open as open,
    compat_os_name,
@ -1102,6 +1103,60 @@ class InfoExtractor(object):
            self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
            return None
    def _search_json(self, start_pattern, string, name, video_id, **kwargs):
        """Searches string for the JSON object specified by start_pattern"""
        # self, start_pattern, string, name, video_id, *, end_pattern='',
        # contains_pattern=r'{(?s:.+)}', fatal=True, default=NO_DEFAULT
        # NB: end_pattern is only used to reduce the size of the initial match
        end_pattern = kwargs.pop('end_pattern', '')
        # (?:[\s\S]) simulates (?(s):.) (eg)
        contains_pattern = kwargs.pop('contains_pattern', r'{[\s\S]+}')
        fatal = kwargs.pop('fatal', True)
        default = kwargs.pop('default', NO_DEFAULT)
        if default is NO_DEFAULT:
            default, has_default = {}, False
        else:
            fatal, has_default = False, True
        json_string = self._search_regex(
            r'(?:{0})\s*(?P<json>{1})\s*(?:{2})'.format(
                start_pattern, contains_pattern, end_pattern),
            string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
        if not json_string:
            return default
        # yt-dlp has a special JSON parser that allows trailing text.
        # Until that arrives here, the diagnostic from the exception
        # raised by json.loads() is used to extract the wanted text.
        # Either way, it's a problem if a transform_source() can't
        # handle the trailing text.
        # force an exception
        kwargs['fatal'] = True
        # self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
        for _ in range(2):
            try:
                # return self._parse_json(json_string, video_id, ignore_extra=True, **kwargs)
                transform_source = kwargs.pop('transform_source', None)
                if transform_source:
                    json_string = transform_source(json_string)
                return self._parse_json(json_string, video_id, **compat_kwargs(kwargs))
            except ExtractorError as e:
                end = int_or_none(self._search_regex(r'\(char\s+(\d+)', error_to_compat_str(e), 'end', default=None))
                if end is not None:
                    json_string = json_string[:end]
                    continue
                msg = 'Unable to extract {0} - Failed to parse JSON'.format(name)
                if fatal:
                    raise ExtractorError(msg, cause=e.cause, video_id=video_id)
                elif not has_default:
                    self.report_warning(
                        '{0}: {1}'.format(msg, error_to_compat_str(e)), video_id=video_id)
            return default
    def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
        """
        Like _search_regex, but strips HTML tags and unescapes entities.
@ -2966,25 +3021,22 @@ class InfoExtractor(object):
        return formats
    def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
-        mobj = re.search(
+        return self._search_json(
-            r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
+            r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
-            webpage)
+            webpage, 'JWPlayer data', video_id,
-        if mobj:
+            # must be a {...} or sequence, ending
-            try:
+            contains_pattern=r'\{[\s\S]*}(?(load)(?:\s*,\s*\{[\s\S]*})*)', end_pattern=r'(?(load)\]|\))',
-                jwplayer_data = self._parse_json(mobj.group('options'),
+            transform_source=transform_source, default=None)
                                                 video_id=video_id,
                                                 transform_source=transform_source)
            except ExtractorError:
                pass
            else:
                if isinstance(jwplayer_data, dict):
                    return jwplayer_data
    def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
-        jwplayer_data = self._find_jwplayer_data(
+
-            webpage, video_id, transform_source=js_to_json)
+        # allow passing `transform_source` through to _find_jwplayer_data()
-        return self._parse_jwplayer_data(
+        transform_source = kwargs.pop('transform_source', None)
-            jwplayer_data, video_id, *args, **kwargs)
+        kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}
        jwplayer_data = self._find_jwplayer_data(webpage, video_id, **kwfind)
        return self._parse_jwplayer_data(jwplayer_data, video_id, *args, **kwargs)
    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
                             m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
@ -3018,22 +3070,14 @@ class InfoExtractor(object):
                mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
            subtitles = {}
-            tracks = video_data.get('tracks')
+            for track in traverse_obj(video_data, (
-            if tracks and isinstance(tracks, list):
+                    'tracks', lambda _, t: t.get('kind').lower() in ('captions', 'subtitles'))):
-                for track in tracks:
+                track_url = urljoin(base_url, track.get('file'))
-                    if not isinstance(track, dict):
+                if not track_url:
-                        continue
+                    continue
-                    track_kind = track.get('kind')
+                subtitles.setdefault(track.get('label') or 'en', []).append({
-                    if not track_kind or not isinstance(track_kind, compat_str):
+                    'url': self._proto_relative_url(track_url)
-                        continue
+                })
                    if track_kind.lower() not in ('captions', 'subtitles'):
                        continue
                    track_url = urljoin(base_url, track.get('file'))
                    if not track_url:
                        continue
                    subtitles.setdefault(track.get('label') or 'en', []).append({
                        'url': self._proto_relative_url(track_url)
                    })
            entry = {
                'id': this_video_id,
@ -3272,7 +3316,11 @@ class InfoExtractor(object):
        if (self._downloader.params.get('mark_watched', False)
                and (self._get_login_info()[0] is not None
                     or self._downloader.params.get('cookiefile') is not None)):
-            self._mark_watched(*args, **kwargs)
+            # extractors apart from YoutubeIE can mark: be more lenient
            try:
                self._mark_watched(*args, **kwargs)
            except NotImplementedError:
                self.report_warning('Marking as watched is not supported')
    def _mark_watched(self, *args, **kwargs):
        raise NotImplementedError('This method must be implemented by subclasses')
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -159,6 +159,7 @@ from .businessinsider import BusinessInsiderIE
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
 from .caffeine import CaffeineTVIE
 from .callin import CallinIE
 from .camdemy import (
    CamdemyIE,
@ -382,7 +383,6 @@ from .fc2 import (
    FC2EmbedIE,
 )
 from .fczenit import FczenitIE
 from .filemoon import FileMoonIE
 from .fifa import FifaIE
 from .filmon import (
    FilmOnIE,
@ -443,6 +443,7 @@ from .gamespot import GameSpotIE
 from .gamestar import GameStarIE
 from .gaskrank import GaskrankIE
 from .gazeta import GazetaIE
 from .gbnews import GBNewsIE
 from .gdcvault import GDCVaultIE
 from .gedidigital import GediDigitalIE
 from .generic import GenericIE
@ -924,6 +925,11 @@ from .palcomp3 import (
    PalcoMP3VideoIE,
 )
 from .pandoratv import PandoraTVIE
 from .panopto import (
    PanoptoIE,
    PanoptoListIE,
    PanoptoPlaylistIE,
 )
 from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
 from .pbs import PBSIE
@ -1652,7 +1658,15 @@ from .younow import (
    YouNowChannelIE,
    YouNowMomentIE,
 )
-from .youporn import YouPornIE
+from .youporn import (
    YouPornIE,
    YouPornCategoryIE,
    YouPornChannelIE,
    YouPornCollectionIE,
    YouPornStarIE,
    YouPornTagIE,
    YouPornVideosIE,
 )
 from .yourporn import YourPornIE
 from .yourupload import YourUploadIE
 from .youtube import (
--- a/youtube_dl/extractor/filemoon.py
+++ b/youtube_dl/extractor/filemoon.py
@ -1,43 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    decode_packed_codes,
    js_to_json,
 )
 class FileMoonIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?filemoon\.sx/./(?P<id>\w+)'
    _TEST = {
        'url': 'https://filemoon.sx/e/dw40rxrzruqz',
        'md5': '5a713742f57ac4aef29b74733e8dda01',
        'info_dict': {
            'id': 'dw40rxrzruqz',
            'title': 'dw40rxrzruqz',
            'ext': 'mp4'
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        matches = re.findall(r'(?s)(eval.*?)</script>', webpage)
        packed = matches[-1]
        unpacked = decode_packed_codes(packed)
        jwplayer_sources = self._parse_json(
            self._search_regex(
                r'(?s)player\s*\.\s*setup\s*\(\s*\{\s*sources\s*:\s*(.*?])', unpacked, 'jwplayer sources'),
            video_id, transform_source=js_to_json)
        formats = self._parse_jwplayer_formats(jwplayer_sources, video_id)
        return {
            'id': video_id,
            'title': self._generic_title(url) or video_id,
            'formats': formats
        }
--- a/youtube_dl/extractor/gbnews.py
+++ b/youtube_dl/extractor/gbnews.py
@ -0,0 +1,139 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    extract_attributes,
    ExtractorError,
    T,
    traverse_obj,
    txt_or_none,
    url_or_none,
 )
 class GBNewsIE(InfoExtractor):
    IE_DESC = 'GB News clips, features and live stream'
    # \w+ is normally shows or news, but apparently any word redirects to the correct URL
    _VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)'
    _PLATFORM = 'safari'
    _SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php'
    _TESTS = [{
        'url': 'https://www.gbnews.uk/shows/andrew-neils-message-to-companies-choosing-to-boycott-gb-news/106889',
        'info_dict': {
            'id': '106889',
            'ext': 'mp4',
            'title': "Andrew Neil's message to companies choosing to boycott GB News",
            'description': 'md5:b281f5d22fd6d5eda64a4e3ba771b351',
        },
        'skip': '404 not found',
    }, {
        'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row',
        'info_dict': {
            'id': '52264136',
            'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row',
            'ext': 'mp4',
            'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism',
            'description': 'The post was criticised by former employers of the broadcaster',
        },
    }, {
        'url': 'https://www.gbnews.uk/watchlive',
        'info_dict': {
            'id': '1069',
            'display_id': 'watchlive',
            'ext': 'mp4',
            'title': 'GB News Live',
            'is_live': True,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url).split('/')[-1]
        webpage = self._download_webpage(url, display_id)
        # extraction based on https://github.com/ytdl-org/youtube-dl/issues/29341
        '''
        <div id="video-106908"
            class="simplestream"
            data-id="GB001"
            data-type="vod"
            data-key="3Li3Nt2Qs8Ct3Xq9Fi5Uy0Mb2Bj0Qs"
            data-token="f9c317c727dc07f515b20036c8ef14a6"
            data-expiry="1624300052"
            data-uvid="37900558"
            data-poster="https://thumbnails.simplestreamcdn.com/gbnews/ondemand/37900558.jpg?width=700&"
            data-npaw="false"
            data-env="production">
        '''
        # exception if no match
        video_data = self._search_regex(
            r'(<div\s[^>]*\bclass\s*=\s*(\'|")(?!.*sidebar\b)simplestream(?:\s[\s\w$-]*)?\2[^>]*>)',
            webpage, 'video data')
        video_data = extract_attributes(video_data)
        ss_id = video_data.get('data-id')
        if not ss_id:
            raise ExtractorError('Simplestream ID not found')
        json_data = self._download_json(
            self._SSMP_URL, display_id,
            note='Downloading Simplestream JSON metadata',
            errnote='Unable to download Simplestream JSON metadata',
            query={
                'id': ss_id,
                'env': video_data.get('data-env', 'production'),
            }, fatal=False)
        meta_url = traverse_obj(json_data, ('response', 'api_hostname'))
        if not meta_url:
            raise ExtractorError('No API host found')
        uvid = video_data['data-uvid']
        dtype = video_data.get('data-type')
        stream_data = self._download_json(
            '%s/api/%s/stream/%s' % (meta_url, 'show' if dtype == 'vod' else dtype, uvid),
            uvid,
            query={
                'key': video_data.get('data-key'),
                'platform': self._PLATFORM,
            },
            headers={
                'Token': video_data.get('data-token'),
                'Token-Expiry': video_data.get('data-expiry'),
                'Uvid': uvid,
            }, fatal=False)
        stream_url = traverse_obj(stream_data, (
            'response', 'stream', T(url_or_none)))
        if not stream_url:
            raise ExtractorError('No stream data/URL')
        # now known to be a dict
        stream_data = stream_data['response']
        drm = stream_data.get('drm')
        if drm:
            self.report_drm(uvid)
        formats = self._extract_m3u8_formats(
            stream_url, uvid, ext='mp4', entry_protocol='m3u8_native',
            fatal=False)
        # exception if no formats
        self._sort_formats(formats)
        return {
            'id': uvid,
            'display_id': display_id,
            'title': (traverse_obj(stream_data, ('title', T(txt_or_none)))
                      or self._og_search_title(webpage, default=None)
                      or display_id.replace('-', ' ').capitalize()),
            'description': self._og_search_description(webpage, default=None),
            'thumbnail': (traverse_obj(video_data, ('data-poster', T(url_or_none)))
                          or self._og_search_thumbnail(webpage)),
            'formats': formats,
            'is_live': (dtype == 'live') or None,
        }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -132,6 +132,7 @@ from .kinja import KinjaEmbedIE
 from .arcpublishing import ArcPublishingIE
 from .medialaan import MedialaanIE
 from .simplecast import SimplecastIE
 from .panopto import PanoptoIE
 class GenericIE(InfoExtractor):
@ -2340,6 +2341,15 @@ class GenericIE(InfoExtractor):
            },
            'expected_warnings': ['uploader id'],
        },
        {
            # Panopto embeds
            'url': 'https://www.monash.edu/learning-teaching/teachhq/learning-technologies/panopto/how-to/insert-a-quiz-into-a-panopto-video',
            'info_dict': {
                'title': 'Insert a quiz into a Panopto video',
                'id': 'insert-a-quiz-into-a-panopto-video'
            },
            'playlist_count': 1
        },
    ]
    def report_following_redirect(self, new_url):
@ -3518,6 +3528,9 @@ class GenericIE(InfoExtractor):
            return self.playlist_from_matches(
                zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
        panopto_entries = PanoptoIE._extract_from_webpage(url, webpage)
        if panopto_entries:
            return self.playlist_result(panopto_entries, video_id, video_title)
        # Look for HTML5 media
        entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
        if entries:
--- a/youtube_dl/extractor/panopto.py
+++ b/youtube_dl/extractor/panopto.py
@ -0,0 +1,663 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import calendar
 from datetime import datetime
 import functools
 import json
 import itertools
 from random import random
 import re
 from .common import InfoExtractor
 from ..compat import (
    compat_map as map,
    compat_parse_qs as parse_qs,
    compat_str,
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
    bug_reports_message,
    ExtractorError,
    get_first,
    int_or_none,
    LazyList,
    merge_dicts,
    OnDemandPagedList,
    orderedSet,
    srt_subtitles_timecode,
    traverse_obj,
    try_get,
    update_url_query,
 )
 import inspect
 if len(try_get(InfoExtractor.report_warning,
               (lambda x: inspect.getfullargspec(x).FullArgs,
                lambda x: inspect.getargspec(x).Args, ), list) or []) <= 2:
    BaseInfoExtractor = InfoExtractor
    class InfoExtractor(BaseInfoExtractor):
        def report_warning(self, warning, only_once=True, _memo=set()):
            from hashlib import md5
            if only_once:
                w_hash = md5(warning).hexdigest()
                if w_hash in _memo:
                    return
                _memo.add(w_hash)
            super(InfoExtractor, self).report_warning(self, warning)
        @classmethod
        def _match_valid_url(cls, url):
            return re.match(cls._VALID_URL, url)
        @staticmethod
        def _merge_subtitle_items(subtitle_list1, subtitle_list2):
            """ Merge subtitle items for one language. Items with duplicated URLs/data
            will be dropped. """
            list1_data = {(item.get('url'), item.get('data')) for item in subtitle_list1}
            ret = list(subtitle_list1)
            ret.extend(item for item in subtitle_list2 if (item.get('url'), item.get('data')) not in list1_data)
            return ret
 class PanoptoBaseIE(InfoExtractor):
    BASE_URL_RE = r'(?P<base_url>https?://[\w.-]+\.panopto.(?:com|eu)/Panopto)'
    # see panopto core.js
    _SUB_LANG_MAPPING = {
        0: 'en-US',
        1: 'en-GB',
        2: 'es-MX',
        3: 'es-ES',
        4: 'de-DE',
        5: 'fr-FR',
        6: 'nl-NL',
        7: 'th-TH',
        8: 'zh-CN',
        9: 'zh-TW',
        10: 'ko-KR',
        11: 'ja-JP',
        12: 'ru-RU',
        13: 'pt-PT',
        14: 'pl-PL',
        15: 'en-AU',
        16: 'da-DK',
        17: 'fi-FI',
        18: 'hu-HU',
        19: 'nb-NO',
        20: 'sv-SE',
        21: 'it-IT'
    }
    def _call_api(self, base_url, path, video_id, data=None, fatal=True, **kwargs):
        response = self._download_json(
            base_url + path, video_id, data=json.dumps(data).encode('utf8') if data else None,
            fatal=fatal, headers={'accept': 'application/json', 'content-type': 'application/json'}, **kwargs)
        if not response:
            return
        error_code = traverse_obj(response, 'ErrorCode')
        if error_code == 2:
            self.raise_login_required(method='cookies')
        elif error_code is not None:
            msg = '%s said: %s' % (self.IE_NAME, response.get('ErrorMessage') or '[no message]')
            if fatal:
                raise ExtractorError(msg, video_id=video_id, expected=True)
            else:
                self.report_warning(msg, video_id=video_id)
        return response
    @staticmethod
    def _parse_fragment(url):
        return dict((k, json.loads(v[0])) for k, v in parse_qs(compat_urllib_parse_urlparse(url).fragment).items())
 class PanoptoIE(PanoptoBaseIE):
    _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)id=(?P<id>[a-f0-9-]+)'
    _EMBED_REGEX = [
        r'''<iframe\b[^>]+\bsrc\s*=\s*(["'])(?P<url>%s/Pages/(?:Viewer|Embed|Sessions/List)\.aspx(?:(?!\1)[\w\W])+)'''
        % (PanoptoBaseIE.BASE_URL_RE, )]
    _TESTS = [
        {
            'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
            'info_dict': {
                'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
                'title': 'Panopto for Business - Use Cases',
                'timestamp': 1459184200,
                'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+',
                'upload_date': '20160328',
                'ext': 'mp4',
                'cast': [],
                'chapters': [],
                'duration': 88.17099999999999,
                'average_rating': int,
                'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e',
                'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
                'channel': 'Showcase Videos'
            },
        },
        {
            'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
            'info_dict': {
                'id': 'ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
                'title': 'Overcoming Top 4 Challenges of Enterprise Video',
                'uploader': 'Panopto Support',
                'timestamp': 1449409251,
                'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+',
                'upload_date': '20151206',
                'ext': 'mp4',
                'chapters': 'count:12',
                'cast': ['Panopto Support'],
                'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
                'average_rating': int,
                'description': 'md5:4391837802b3fc856dadf630c4b375d1',
                'duration': 1088.2659999999998,
                'channel_id': '9f3c1921-43bb-4bda-8b3a-b8d2f05a8546',
                'channel': 'Webcasts',
            },
        },
        {
            # Extra params in URL
            'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?randomparam=thisisnotreal&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
            'info_dict': {
                'id': '5fa74e93-3d87-4694-b60e-aaa4012214ed',
                'ext': 'mp4',
                'duration': 129.513,
                'cast': ['Kathryn Kelly'],
                'uploader_id': '316a0a58-7fa2-4cd9-be1c-64270d284a56',
                'timestamp': 1569845768,
                'tags': ['Viewer', 'Enterprise'],
                'chapters': [],
                'upload_date': '20190930',
                'thumbnail': r're:https://howtovideos\.hosted\.panopto\.com/.+',
                'description': 'md5:2d844aaa1b1a14ad0e2601a0993b431f',
                'title': 'Getting Started: View a Video',
                'average_rating': int,
                'uploader': 'Kathryn Kelly',
                'channel_id': 'fb93bc3c-6750-4b80-a05b-a921013735d3',
                'channel': 'Getting Started',
            }
        },
        {
            # Does not allow normal Viewer.aspx. AUDIO livestream has no url, so should be skipped and only give one stream.
            'url': 'https://unisa.au.panopto.com/Panopto/Pages/Embed.aspx?id=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
            'info_dict': {
                'id': '9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
                'ext': 'mp4',
                'cast': ['LTS CLI Script'],
                'chapters': [],
                'duration': 2178.45,
                'description': 'md5:ee5cf653919f55b72bce2dbcf829c9fa',
                'channel_id': 'b23e673f-c287-4cb1-8344-aae9005a69f8',
                'average_rating': int,
                'uploader_id': '38377323-6a23-41e2-9ff6-a8e8004bf6f7',
                'uploader': 'LTS CLI Script',
                'timestamp': 1572458134,
                'title': 'WW2 Vets Interview 3 Ronald Stanley George',
                'thumbnail': r're:https://unisa\.au\.panopto\.com/.+',
                'channel': 'World War II Veteran Interviews',
                'upload_date': '20191030',
            },
        },
        {
            # Slides/storyboard
            'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=a7f12f1d-3872-4310-84b0-f8d8ab15326b',
            'info_dict': {
                'id': 'a7f12f1d-3872-4310-84b0-f8d8ab15326b',
                'ext': 'mhtml',
                'timestamp': 1448798857,
                'duration': 4712.681,
                'title': 'Cache Memory - CompSci 15-213, Lecture 12',
                'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
                'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
                'upload_date': '20151129',
                'average_rating': 0,
                'uploader': 'Panopto Support',
                'channel': 'Showcase Videos',
                'description': 'md5:55e51d54233ddb0e6c2ed388ca73822c',
                'cast': ['ISR Videographer', 'Panopto Support'],
                'chapters': 'count:28',
                'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+',
            },
            'params': {'format': 'mhtml', 'skip_download': True},
            'skip': 'Not yet implemented',
        },
        {
            'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=8285224a-9a2b-4957-84f2-acb0000c4ea9',
            'info_dict': {
                'id': '8285224a-9a2b-4957-84f2-acb0000c4ea9',
                'ext': 'mp4',
                'chapters': [],
                'title': 'Company Policy',
                'average_rating': 0,
                'timestamp': 1615058901,
                'channel': 'Human Resources',
                'tags': ['HumanResources'],
                'duration': 1604.243,
                'thumbnail': r're:https://na-training-1\.hosted\.panopto\.com/.+',
                'uploader_id': '8e8ba0a3-424f-40df-a4f1-ab3a01375103',
                'uploader': 'Cait M.',
                'upload_date': '20210306',
                'cast': ['Cait M.'],
                # 'subtitles': {'en-US': [{'ext': 'srt', 'data': 'md5:a3f4d25963fdeace838f327097c13265'}],
                #               'es-ES': [{'ext': 'srt', 'data': 'md5:57e9dad365fd0fbaf0468eac4949f189'}]},
            },
            'params': {'writesubtitles': True, 'skip_download': True}
        }, {
            # On Panopto there are two subs: "Default" and en-US. en-US is blank and should be skipped.
            'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=940cbd41-f616-4a45-b13e-aaf1000c915b',
            'info_dict': {
                'id': '940cbd41-f616-4a45-b13e-aaf1000c915b',
                'ext': 'mp4',
                'subtitles': 'count:1',
                'title': 'HR Benefits Review Meeting*',
                'cast': ['Panopto Support'],
                'chapters': [],
                'timestamp': 1575024251,
                'thumbnail': r're:https://na-training-1\.hosted\.panopto\.com/.+',
                'channel': 'Zoom',
                'description': 'md5:04f90a9c2c68b7828144abfb170f0106',
                'uploader': 'Panopto Support',
                'average_rating': 0,
                'duration': 409.34499999999997,
                'uploader_id': 'b6ac04ad-38b8-4724-a004-a851004ea3df',
                'upload_date': '20191129',
            },
            'params': {'writesubtitles': True, 'skip_download': True}
        },
        {
            'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb',
            'only_matching': True
        },
        {
            'url': 'https://brown.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=0b3ff73b-36a0-46c5-8455-aadf010a3638',
            'only_matching': True
        },
    ]
    @classmethod
    def suitable(cls, url):
        return False if PanoptoPlaylistIE.suitable(url) else super(PanoptoIE, cls).suitable(url)
    @classmethod
    def _extract_from_webpage(cls, url, webpage):
        return LazyList(map(
            lambda u: cls.url_result(u, cls.ie_key()),
            orderedSet(m.group('url') for m in itertools.chain(
                *(re.finditer(embed_re, webpage) for embed_re in cls._EMBED_REGEX)))))
    def _mark_watched(self, base_url, video_id, delivery_info):
        duration = traverse_obj(delivery_info, ('Delivery', 'Duration'), expected_type=float)
        invocation_id = delivery_info.get('InvocationId')
        stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', Ellipsis, 'PublicID'), get_all=False, expected_type=compat_str)
        if invocation_id and stream_id and duration:
            timestamp_str = '/Date(%s000)/' % (calendar.timegm(datetime.utcnow().timetuple()), )
            data = {
                'streamRequests': [
                    {
                        'ClientTimeStamp': timestamp_str,
                        'ID': 0,
                        'InvocationID': invocation_id,
                        'PlaybackSpeed': 1,
                        'SecondsListened': duration - 1,
                        'SecondsRejected': 0,
                        'StartPosition': 0,
                        'StartReason': 2,
                        'StopReason': None,
                        'StreamID': stream_id,
                        'TimeStamp': timestamp_str,
                        'UpdatesRejected': 0
                    },
                ]}
            self._download_webpage(
                base_url + '/Services/Analytics.svc/AddStreamRequests', video_id,
                fatal=False, data=json.dumps(data).encode('utf8'), headers={'content-type': 'application/json'},
                note='Marking watched', errnote='Unable to mark watched')
    @staticmethod
    def _extract_chapters(timestamps):
        chapters = []
        for timestamp in timestamps or []:
            caption = timestamp.get('Caption')
            start, duration = int_or_none(timestamp.get('Time')), int_or_none(timestamp.get('Duration'))
            if not caption or start is None or duration is None:
                continue
            chapters.append({
                'start_time': start,
                'end_time': start + duration,
                'title': caption
            })
        return chapters
    @staticmethod
    def _extract_mhtml_formats(base_url, timestamps):
        image_frags = {}
        for timestamp in timestamps or []:
            duration = timestamp.get('Duration')
            obj_id, obj_sn = timestamp.get('ObjectIdentifier'), timestamp.get('ObjectSequenceNumber'),
            if timestamp.get('EventTargetType') == 'PowerPoint' and obj_id is not None and obj_sn is not None:
                image_frags.setdefault('slides', []).append({
                    'url': update_url_query(
                        base_url + '/Pages/Viewer/Image.aspx', {
                            'id': obj_id,
                            'number': obj_sn,
                        }),
                    'duration': duration
                })
            obj_pid, session_id, abs_time = timestamp.get('ObjectPublicIdentifier'), timestamp.get('SessionID'), timestamp.get('AbsoluteTime')
            if None not in (obj_pid, session_id, abs_time):
                image_frags.setdefault('chapter', []).append({
                    'url': update_url_query(
                        base_url + '/Pages/Viewer/Thumb.aspx?isPrimary=false', {
                            'eventTargetPID': obj_pid,
                            'sessionPID': session_id,
                            'number': obj_sn,
                            'absoluteTime': abs_time,
                        }),
                    'duration': duration,
                })
        for name, fragments in image_frags.items():
            yield {
                'format_id': name,
                'ext': 'mhtml',
                'protocol': 'mhtml',
                'acodec': 'none',
                'vcodec': 'none',
                'url': 'about:invalid',
                'fragments': fragments
            }
    @staticmethod
    def _json2srt(data, delivery):
        SRT_CAPTION_FMT = '{0}\n{1} --> {2}\n{3}'
        def gen_lines(dat, deliv):
            for i, line in enumerate(dat):
                start_time = line['Time']
                duration = line.get('Duration')
                if duration:
                    end_time = start_time + duration
                else:
                    end_time = traverse_obj(dat, (i + 1, 'Time')) or deliv['Duration']
                yield SRT_CAPTION_FMT.format(
                    i + 1, srt_subtitles_timecode(start_time), srt_subtitles_timecode(end_time), line['Caption'])
        return '\n\n'.join(gen_lines(data, delivery))
    def _get_subtitles(self, base_url, video_id, delivery):
        subtitles = {}
        for lang in delivery.get('AvailableLanguages') or []:
            response = self._call_api(
                base_url, '/Pages/Viewer/DeliveryInfo.aspx', video_id, fatal=False,
                note='Downloading captions JSON metadata', query={
                    'deliveryId': video_id,
                    'getCaptions': True,
                    'language': compat_str(lang),
                    'responseType': 'json'
                }
            )
            if not isinstance(response, list):
                continue
            subtitles.setdefault(self._SUB_LANG_MAPPING.get(lang) or 'default', []).append({
                'ext': 'srt',
                'data': self._json2srt(response, delivery),
            })
        return subtitles
    def _extract_streams_formats_and_subtitles(self, video_id, streams, **fmt_kwargs):
        formats = []
        subtitles = {}
        for stream in streams or []:
            stream_formats = []
            http_stream_url = stream.get('StreamHttpUrl')
            stream_url = stream.get('StreamUrl')
            if http_stream_url:
                stream_formats.append({'url': http_stream_url})
            if stream_url:
                media_type = stream.get('ViewerMediaFileTypeName')
                if media_type in ('hls', ):
                    # m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
                    m3u8_formats = self._extract_m3u8_formats(stream_url, video_id)
                    stream_formats.extend(m3u8_formats)
                    # subtitles = self._merge_subtitles(subtitles, stream_subtitles)
                else:
                    stream_formats.append({
                        'url': stream_url
                    })
            for fmt in stream_formats:
                fmt.update({'format_note': stream.get('Tag'), })
                fmt.update(fmt_kwargs)
            formats.extend(stream_formats)
        return formats, subtitles
    def _real_extract(self, url):
        base_url, video_id = self._match_valid_url(url).group('base_url', 'id')
        delivery_info = self._call_api(
            base_url, '/Pages/Viewer/DeliveryInfo.aspx', video_id,
            query={
                'deliveryId': video_id,
                'invocationId': '',
                'isLiveNotes': 'false',
                'refreshAuthCookie': 'true',
                'isActiveBroadcast': 'false',
                'isEditing': 'false',
                'isKollectiveAgentInstalled': 'false',
                'isEmbed': 'false',
                'responseType': 'json',
            }
        )
        delivery = delivery_info['Delivery']
        session_start_time = int_or_none(delivery.get('SessionStartTime'))
        timestamps = delivery.get('Timestamps')
        # Podcast stream is usually the combined streams. We will prefer that by default.
        podcast_formats, podcast_subtitles = self._extract_streams_formats_and_subtitles(
            video_id, delivery.get('PodcastStreams'), format_note='PODCAST')
        streams_formats, streams_subtitles = self._extract_streams_formats_and_subtitles(
            video_id, delivery.get('Streams'), preference=-10)
        formats = podcast_formats + streams_formats
        formats.extend(self._extract_mhtml_formats(base_url, timestamps))
        subtitles = self._merge_subtitles(podcast_subtitles, streams_subtitles)
        subtitles = self._merge_subtitles(subtitles, self.extract_subtitles(base_url, video_id, delivery))
        self._sort_formats(formats)
        self.mark_watched(base_url, video_id, delivery_info)
        return {
            'id': video_id,
            'title': delivery.get('SessionName'),
            'cast': traverse_obj(delivery, ('Contributors', Ellipsis, 'DisplayName'), default=[], expected_type=lambda x: x or None),
            'timestamp': session_start_time - 11640000000 if session_start_time else None,
            'duration': delivery.get('Duration'),
            'thumbnail': update_url_query(
                base_url + '/Services/FrameGrabber.svc/FrameRedirect?mode=Delivery', {
                    'objectId': video_id,
                    'random': random(),
                }),
            'average_rating': delivery.get('AverageRating'),
            'chapters': self._extract_chapters(timestamps),
            'uploader': delivery.get('OwnerDisplayName') or None,
            'uploader_id': delivery.get('OwnerId'),
            'description': delivery.get('SessionAbstract'),
            'tags': traverse_obj(delivery, ('Tags', Ellipsis, 'Content')),
            'channel_id': delivery.get('SessionGroupPublicID'),
            'channel': traverse_obj(delivery, 'SessionGroupLongName', 'SessionGroupShortName', get_all=False),
            'formats': formats,
            'subtitles': subtitles
        }
 class PanoptoPlaylistIE(PanoptoBaseIE):
    _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)pid=(?P<id>[a-f0-9-]+)'
    _TESTS = [
        {
            'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=f3b39fcf-882f-4849-93d6-a9f401236d36&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
            'info_dict': {
                'title': 'Featured Video Tutorials',
                'id': 'f3b39fcf-882f-4849-93d6-a9f401236d36',
            },
            'playlist_mincount': 34,  # was 36
        },
        {
            'url': 'https://utsa.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=e2900555-3ad4-4bdb-854d-ad2401686190',
            'info_dict': {
                'title': 'Library Website Introduction Playlist',
                'id': 'e2900555-3ad4-4bdb-854d-ad2401686190',
                'description': 'md5:f958bca50a1cbda15fdc1e20d32b3ecb',
            },
            'playlist_mincount': 4
        },
    ]
    def _entries(self, base_url, playlist_id, session_list_id):
        session_list_info = self._call_api(
            base_url,
            '/Api/SessionLists/%s?collections[0].maxCount=500&collections[0].name=items' % (session_list_id, ),
            playlist_id)
        items = session_list_info['Items']
        for item in items:
            if item.get('TypeName') != 'Session':
                self.report_warning('Got an item in the playlist that is not a Session' + bug_reports_message(), only_once=True)
                continue
            yield merge_dicts(
                self.url_result(item.get('ViewerUri'), item.get('Id'), item.get('Name')), {
                    'description': item.get('Description'),
                    'duration': item.get('Duration'),
                    'channel': traverse_obj(item, ('Parent', 'Name')),
                    'channel_id': traverse_obj(item, ('Parent', 'Id'))
                })
    def _real_extract(self, url):
        base_url, playlist_id = self._match_valid_url(url).group('base_url', 'id')
        video_id = get_first(parse_qs(url), 'id')
        if video_id:
            if self.get_param('noplaylist'):
                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
                return self.url_result(update_url_query(base_url + '/Pages/Viewer.aspx', {'id': video_id}), ie_key=PanoptoIE.ie_key(), video_id=video_id)
            else:
                self.to_screen('Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}'.format(**locals()))
        playlist_info = self._call_api(base_url, '/Api/Playlists/' + playlist_id, playlist_id)
        return self.playlist_result(
            self._entries(base_url, playlist_id, playlist_info['SessionListId']),
            playlist_id=playlist_id, playlist_title=playlist_info.get('Name'),
            playlist_description=playlist_info.get('Description'))
 class PanoptoListIE(PanoptoBaseIE):
    _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/Sessions/List\.aspx'
    _PAGE_SIZE = 250
    _TESTS = [
        {
            'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%22e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a%22',
            'info_dict': {
                'id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
                'title': 'Showcase Videos'
            },
            'playlist_mincount': 140
        },
        {
            'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#view=2&maxResults=250',
            'info_dict': {
                'id': 'panopto_list',
                'title': 'panopto_list'
            },
            'playlist_mincount': 300
        },
        {
            # Folder that contains 8 folders and a playlist
            'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?noredirect=true#folderID=%224b9de7ae-0080-4158-8496-a9ba01692c2e%22',
            'info_dict': {
                'id': '4b9de7ae-0080-4158-8496-a9ba01692c2e',
                'title': 'Video Tutorials'
            },
            'playlist_mincount': 9
        }
    ]
    def _fetch_page(self, base_url, query_params, display_id, page):
        params = merge_dicts({
            'page': page,
            'maxResults': self._PAGE_SIZE,
        }, query_params, {
            'sortColumn': 1,
            'getFolderData': True,
            'includePlaylists': True,
        })
        response = self._call_api(
            base_url, '/Services/Data.svc/GetSessions', '%s page %d' % (display_id, page + 1),
            data={'queryParameters': params}, fatal=False)
        for result in get_first(response, 'Results', default=[]):
            # This could be a video, playlist (or maybe something else)
            item_id = result.get('DeliveryID')
            yield merge_dicts(
                self.url_result(
                    traverse_obj(result, 'ViewerUrl', 'EmbedUrl', get_all=False)
                    or update_url_query(base_url + '/Pages/Viewer.aspx', {'id': item_id}),
                    item_id, result.get('SessionName')), {
                    'duration': result.get('Duration'),
                    'channel': result.get('FolderName'),
                    'channel_id': result.get('FolderID'),
                })
        for folder in get_first(response, 'Subfolders', default=[]):
            folder_id = folder.get('ID')
            yield self.url_result(
                '%s/Pages/Sessions/List.aspx#folderID=%s' % (base_url, folder_id),
                ie_key=PanoptoListIE.ie_key(), video_id=folder_id, title=folder.get('Name'))
    def _extract_folder_metadata(self, base_url, folder_id):
        response = self._call_api(
            base_url, '/Services/Data.svc/GetFolderInfo', folder_id,
            data={'folderID': folder_id}, fatal=False)
        return {
            'title': get_first(response, 'Name', default=[])
        }
    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        base_url = mobj.group('base_url')
        query_params = self._parse_fragment(url)
        folder_id, display_id = query_params.get('folderID'), 'panopto_list'
        if query_params.get('isSubscriptionsPage'):
            display_id = 'subscriptions'
            if not query_params.get('subscribableTypes'):
                query_params['subscribableTypes'] = [0, 1, 2]
        elif query_params.get('isSharedWithMe'):
            display_id = 'sharedwithme'
        elif folder_id:
            display_id = folder_id
        query = query_params.get('query')
        if query:
            display_id += ': query "%s"' % (query, )
        info = self.playlist_result(
            OnDemandPagedList(
                functools.partial(self._fetch_page, base_url, query_params, display_id), self._PAGE_SIZE),
            playlist_id=display_id,
            playlist_title=display_id)
        if folder_id:
            info.update(self._extract_folder_metadata(base_url, folder_id))
        return info
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@ -7,6 +7,7 @@ import time
 from .common import InfoExtractor
 from ..compat import compat_kwargs
 from ..utils import (
    base_url,
    determine_ext,
    ExtractorError,
    float_or_none,
@ -14,6 +15,7 @@ from ..utils import (
    T,
    traverse_obj,
    txt_or_none,
    url_basename,
    url_or_none,
 )
@ -33,8 +35,8 @@ class Vbox7IE(InfoExtractor):
                    '''
    _EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)']
    _GEO_COUNTRIES = ['BG']
    _GEO_BYPASS = False
    _TESTS = [{
        # the http: URL just redirects here
        'url': 'https://vbox7.com/play:0946fff23c',
        'md5': '50ca1f78345a9c15391af47d8062d074',
        'info_dict': {
@ -42,17 +44,19 @@ class Vbox7IE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Борисов: Притеснен съм за бъдещето на България',
            'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"',
            'thumbnail': r're:^https?://.*\.jpg$',
            'timestamp': 1470982814,
            'upload_date': '20160812',
            'uploader': 'zdraveibulgaria',
            'thumbnail': r're:^https?://.*\.jpg$',
            'view_count': int,
            'duration': 2640,
        },
        'expected_warnings': [
            'Unable to download webpage',
        ],
    }, {
        'url': 'http://vbox7.com/play:249bb972c2',
-        'md5': 'aaf19465e37ec0b30b918df83ec32c50',
+        'md5': '99f65c0c9ef9b682b97313e052734c3f',
        'info_dict': {
            'id': '249bb972c2',
            'ext': 'mp4',
@ -61,7 +65,11 @@ class Vbox7IE(InfoExtractor):
            'timestamp': 1360215023,
            'upload_date': '20130207',
            'uploader': 'svideteliat_ot_varshava',
            'thumbnail': 'https://i49.vbox7.com/o/249/249bb972c20.jpg',
            'view_count': int,
            'duration': 83,
        },
        'expected_warnings': ['Failed to download m3u8 information'],
    }, {
        'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
        'only_matching': True,
@ -76,6 +84,9 @@ class Vbox7IE(InfoExtractor):
        if mobj:
            return mobj.group('url')
    # specialisation to transform what looks like ld+json that
    # may contain invalid character combinations
    # transform_source=None, fatal=True
    def _parse_json(self, json_string, video_id, *args, **kwargs):
        if '"@context"' in json_string[:30]:
@ -103,49 +114,64 @@ class Vbox7IE(InfoExtractor):
        now = time.time()
        response = self._download_json(
-            'https://www.vbox7.com/aj/player/item/options?vid=%s' % (video_id,),
+            'https://www.vbox7.com/aj/player/item/options', video_id,
-            video_id, headers={'Referer': url})
+            query={'vid': video_id}, headers={'Referer': url})
        # estimate time to which possible `ago` member is relative
        now = now + 0.5 * (time.time() - now)
-        if 'error' in response:
+        if traverse_obj(response, 'error'):
            raise ExtractorError(
                '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
-        video_url = traverse_obj(response, ('options', 'src', T(url_or_none)))
+        src_url = traverse_obj(response, ('options', 'src', T(url_or_none))) or ''
-        if '/na.mp4' in video_url or '':
+        fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
        if fmt_base in ('na', 'vn'):
            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
-        ext = determine_ext(video_url)
+        ext = determine_ext(src_url)
        if ext == 'mpd':
-            # In case MPD cannot be parsed, or anyway, get mp4 combined
+            # extract MPD
            # formats usually provided to Safari, iOS, and old Windows
            try:
                formats, subtitles = self._extract_mpd_formats_and_subtitles(
-                    video_url, video_id, 'dash', fatal=False)
+                    src_url, video_id, 'dash', fatal=False)
-            except KeyError:
+            except KeyError:  # fatal doesn't catch this
                self.report_warning('Failed to parse MPD manifest')
                formats, subtitles = [], {}
        elif ext != 'm3u8':
            formats = [{
                'url': src_url,
            }] if src_url else []
            subtitles = {}
        if src_url:
            # possibly extract HLS, based on https://github.com/yt-dlp/yt-dlp/pull/9100
            fmt_base = base_url(src_url) + fmt_base
            # prepare for _extract_m3u8_formats_and_subtitles()
            # hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
            hls_formats = self._extract_m3u8_formats(
                '{0}.m3u8'.format(fmt_base), video_id, m3u8_id='hls', fatal=False)
            formats.extend(hls_formats)
            # self._merge_subtitles(hls_subs, target=subtitles)
            # In case MPD/HLS cannot be parsed, or anyway, get mp4 combined
            # formats usually provided to Safari, iOS, and old Windows
            video = response['options']
            resolutions = (1080, 720, 480, 240, 144)
-            highest_res = traverse_obj(video, ('highestRes', T(int))) or resolutions[0]
+            highest_res = traverse_obj(video, (
-            for res in traverse_obj(video, ('resolutions', lambda _, r: int(r) > 0)) or resolutions:
+                'highestRes', T(int))) or resolutions[0]
-                if res > highest_res:
+            resolutions = traverse_obj(video, (
-                    continue
+                'resolutions', lambda _, r: highest_res >= int(r) > 0)) or resolutions
-                formats.append({
+            mp4_formats = traverse_obj(resolutions, (
-                    'url': video_url.replace('.mpd', '_%d.mp4' % res),
+                Ellipsis, T(lambda res: {
-                    'format_id': '%dp' % res,
+                    'url': '{0}_{1}.mp4'.format(fmt_base, res),
                    'format_id': 'http-{0}'.format(res),
                    'height': res,
-                })
+                })))
            # if above formats are flaky, enable the line below
-            # self._check_formats(formats, video_id)
+            # self._check_formats(mp4_formats, video_id)
-        else:
+            formats.extend(mp4_formats)
-            formats = [{
+
                'url': video_url,
            }]
            subtitles = {}
        self._sort_formats(formats)
        webpage = self._download_webpage(url, video_id, fatal=False) or ''
--- a/youtube_dl/extractor/videa.py
+++ b/youtube_dl/extractor/videa.py
@ -6,22 +6,31 @@ import re
 import string
 from .common import InfoExtractor
 from ..compat import (
    compat_b64decode,
    compat_ord,
    compat_struct_pack,
 )
 from ..utils import (
    ExtractorError,
    int_or_none,
    mimetype2ext,
    parse_codecs,
    parse_qs,
    update_url_query,
    urljoin,
    xpath_element,
    xpath_text,
 )
-from ..compat import (
+
-    compat_b64decode,
+
-    compat_ord,
+def compat_random_choices(population, *args, **kwargs):
-    compat_struct_pack,
+    # weights=None, *, cum_weights=None, k=1
-    compat_urlparse,
+    # limited implementation needed here
-)
+    weights = args[0] if args else kwargs.get('weights')
    assert all(w is None for w in (weights, kwargs.get('cum_weights')))
    k = kwargs.get('k', 1)
    return ''.join(random.choice(population) for _ in range(k))
 class VideaIE(InfoExtractor):
@ -35,6 +44,7 @@ class VideaIE(InfoExtractor):
                        )
                        (?P<id>[^?#&]+)
                    '''
    _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1']
    _TESTS = [{
        'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
        'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
@ -44,6 +54,7 @@ class VideaIE(InfoExtractor):
            'title': 'Az őrült kígyász 285 kígyót enged szabadon',
            'thumbnail': r're:^https?://.*',
            'duration': 21,
            'age_limit': 0,
        },
    }, {
        'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
@ -54,6 +65,7 @@ class VideaIE(InfoExtractor):
            'title': 'Supercars előzés',
            'thumbnail': r're:^https?://.*',
            'duration': 64,
            'age_limit': 0,
        },
    }, {
        'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
@ -64,6 +76,7 @@ class VideaIE(InfoExtractor):
            'title': 'Az őrült kígyász 285 kígyót enged szabadon',
            'thumbnail': r're:^https?://.*',
            'duration': 21,
            'age_limit': 0,
        },
    }, {
        'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
@ -80,11 +93,14 @@ class VideaIE(InfoExtractor):
    }]
    _STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
-    @staticmethod
+    @classmethod
-    def _extract_urls(webpage):
+    def _extract_urls(cls, webpage):
-        return [url for _, url in re.findall(
+        def yield_urls():
-            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
+            for pattern in cls._EMBED_REGEX:
-            webpage)]
+                for m in re.finditer(pattern, webpage):
                    yield m.group('url')
        return list(yield_urls())
    @staticmethod
    def rc4(cipher_text, key):
@ -130,13 +146,13 @@ class VideaIE(InfoExtractor):
        for i in range(0, 32):
            result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
-        query = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
+        query = parse_qs(player_url)
-        random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
+        random_seed = ''.join(compat_random_choices(string.ascii_letters + string.digits, k=8))
        query['_s'] = random_seed
        query['_t'] = result[:16]
        b64_info, handle = self._download_webpage_handle(
-            'http://videa.hu/videaplayer_get_xml.php', video_id, query=query)
+            'http://videa.hu/player/xml', video_id, query=query)
        if b64_info.startswith('<?xml'):
            info = self._parse_xml(b64_info, video_id)
        else:
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -673,8 +673,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
            raise
        if '//player.vimeo.com/video/' in url:
-            config = self._parse_json(self._search_regex(
+            config = self._search_json(
-                r'(?s)\b(?:playerC|c)onfig\s*=\s*({.+?})\s*[;\n]', webpage, 'info section'), video_id)
+                r'\b(?:playerC|c)onfig\s*=', webpage, 'info section', video_id)
            if config.get('view') == 4:
                config = self._verify_player_video_password(
                    redirect_url, video_id, headers)
--- a/youtube_dl/extractor/xfileshare.py
+++ b/youtube_dl/extractor/xfileshare.py
@ -4,20 +4,28 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
-from ..compat import compat_chr
+from ..compat import (
    compat_chr,
    compat_zip as zip,
 )
 from ..utils import (
    clean_html,
    decode_packed_codes,
    determine_ext,
    ExtractorError,
    get_element_by_id,
    int_or_none,
-    js_to_json,
+    merge_dicts,
    T,
    traverse_obj,
    url_or_none,
    urlencode_postdata,
 )
 # based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
 def aa_decode(aa_code):
-    symbol_table = [
+    symbol_table = (
        ('7', '((ﾟｰﾟ) + (o^_^o))'),
        ('6', '((o^_^o) +(o^_^o))'),
        ('5', '((ﾟｰﾟ) + (ﾟΘﾟ))'),
@ -26,84 +34,180 @@ def aa_decode(aa_code):
        ('3', '(o^_^o)'),
        ('1', '(ﾟΘﾟ)'),
        ('0', '(c^_^o)'),
-    ]
+        ('+', ''),
    )
    delim = '(ﾟДﾟ)[ﾟεﾟ]+'
-    ret = ''
+
-    for aa_char in aa_code.split(delim):
+    def chr_from_code(c):
        for val, pat in symbol_table:
-            aa_char = aa_char.replace(pat, val)
+            c = c.replace(pat, val)
-        aa_char = aa_char.replace('+ ', '')
+        if c.startswith(('u', 'U')):
-        m = re.match(r'^\d+', aa_char)
+            base = 16
-        if m:
+            c = c[1:]
            ret += compat_chr(int(m.group(0), 8))
        else:
-            m = re.match(r'^u([\da-f]+)', aa_char)
+            base = 10
-            if m:
+        c = int_or_none(c, base=base)
-                ret += compat_chr(int(m.group(1), 16))
+        return '' if c is None else compat_chr(c)
-    return ret
+
    return ''.join(
        chr_from_code(aa_char)
        for aa_char in aa_code.split(delim))
 class XFileShareIE(InfoExtractor):
    _SITES = (
-        (r'aparat\.cam', 'Aparat'),
+        # status check 2024-02: site availability, G site: search
-        (r'clipwatching\.com', 'ClipWatching'),
+        (r'aparat\.cam', 'Aparat'),  # Cloudflare says host error 522, apparently changed to wolfstreeam.tv
-        (r'gounlimited\.to', 'GoUnlimited'),
+        (r'filemoon\.sx/.', 'FileMoon'),
-        (r'govid\.me', 'GoVid'),
+        (r'gounlimited\.to', 'GoUnlimited'),  # no media pages listed
-        (r'holavid\.com', 'HolaVid'),
+        (r'govid\.me', 'GoVid'),  # no media pages listed
-        (r'streamty\.com', 'Streamty'),
+        (r'highstream\.tv', 'HighStream'),  # clipwatching.com redirects here
-        (r'thevideobee\.to', 'TheVideoBee'),
+        (r'holavid\.com', 'HolaVid'),  # Cloudflare says host error 522
-        (r'uqload\.com', 'Uqload'),
+        # (r'streamty\.com', 'Streamty'),  # no media pages listed, connection timeout
-        (r'vidbom\.com', 'VidBom'),
+        # (r'thevideobee\.to', 'TheVideoBee'),  # no pages listed, refuses connection
-        (r'vidlo\.us', 'vidlo'),
+        (r'uqload\.to', 'Uqload'),  # .com, .co redirect here
-        (r'vidlocker\.xyz', 'VidLocker'),
+        (r'(?:vedbam\.xyz|vadbam.net)', 'V?dB?m'),  # vidbom.com redirects here, but no valid media pages listed
-        (r'vidshare\.tv', 'VidShare'),
+        (r'vidlo\.us', 'vidlo'),  # no valid media pages listed
-        (r'vup\.to', 'VUp'),
+        (r'vidlocker\.xyz', 'VidLocker'),  # no media pages listed
        (r'(?:w\d\.)?viidshar\.com', 'VidShare'),  # vidshare.tv redirects here
        # (r'vup\.to', 'VUp'),  # domain not found
        (r'wolfstream\.tv', 'WolfStream'),
-        (r'xvideosharing\.com', 'XVideoSharing'),
+        (r'xvideosharing\.com', 'XVideoSharing'),  # just started showing 'maintenance mode'
    )
-    IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
+    IE_DESC = 'XFileShare-based sites: %s' % ', '.join(list(zip(*_SITES))[1])
    _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
                  % '|'.join(site for site in list(zip(*_SITES))[0]))
    _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]
    _FILE_NOT_FOUND_REGEXES = (
        r'>(?:404 - )?File Not Found<',
        r'>The file was removed by administrator<',
    )
    _TITLE_REGEXES = (
        r'style="z-index: [0-9]+;">([^<]+)</span>',
        r'<td nowrap>([^<]+)</td>',
        r'h4-fine[^>]*>([^<]+)<',
        r'>Watch (.+)[ <]',
        r'<h2 class="video-page-head">([^<]+)</h2>',
        r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<',  # streamin.to (dead)
        r'title\s*:\s*"([^"]+)"',  # govid.me
    )
    _SOURCE_URL_REGEXES = (
        r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
        r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
        r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
        r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
    )
    _THUMBNAIL_REGEXES = (
        r'<video[^>]+poster="([^"]+)"',
        r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
    )
    _TESTS = [{
-        'url': 'http://xvideosharing.com/fq65f94nd2ve',
+        'note': 'link in `sources`',
-        'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
+        'url': 'https://uqload.to/dcsu06gdb45o',
        'md5': '7f8db187b254379440bf4fcad094ae86',
        'info_dict': {
-            'id': 'fq65f94nd2ve',
+            'id': 'dcsu06gdb45o',
            'ext': 'mp4',
-            'title': 'sample',
+            'title': 'f2e31015957e74c8c8427982e161c3fc mp4',
-            'thumbnail': r're:http://.*\.jpg',
+            'thumbnail': r're:https://.*\.jpg'
        },
        'params': {
            'nocheckcertificate': True,
        },
        'expected_warnings': ['Unable to extract JWPlayer data'],
    }, {
        'note': 'link in decoded `sources`',
        'url': 'https://xvideosharing.com/1tlg6agrrdgc',
        'md5': '2608ce41932c1657ae56258a64e647d9',
        'info_dict': {
            'id': '1tlg6agrrdgc',
            'ext': 'mp4',
            'title': '0121',
            'thumbnail': r're:https?://.*\.jpg',
        },
        'skip': 'This server is in maintenance mode.',
    }, {
        'note': 'JWPlayer link in un-p,a,c,k,e,d JS',
        'url': 'https://filemoon.sx/e/dw40rxrzruqz',
        'md5': '5a713742f57ac4aef29b74733e8dda01',
        'info_dict': {
            'id': 'dw40rxrzruqz',
            'title': 'dw40rxrzruqz',
            'ext': 'mp4'
        },
    }, {
        'note': 'JWPlayer link in un-p,a,c,k,e,d JS',
        'url': 'https://vadbam.net/6lnbkci96wly.html',
        'md5': 'a1616800076177e2ac769203957c54bc',
        'info_dict': {
            'id': '6lnbkci96wly',
            'title': 'Heart Crime S01 E03 weciima autos',
            'ext': 'mp4'
        },
    }, {
        'note': 'JWPlayer link in clear',
        'url': 'https://w1.viidshar.com/nnibe0xf0h79.html',
        'md5': 'f0a580ce9df06cc61b4a5c979d672367',
        'info_dict': {
            'id': 'nnibe0xf0h79',
            'title': 'JaGa 68ar',
            'ext': 'mp4'
        },
        'params': {
            'skip_download': 'ffmpeg',
        },
        'expected_warnings': ['hlsnative has detected features it does not support'],
    }, {
        'note': 'JWPlayer link in clear',
        'url': 'https://wolfstream.tv/a3drtehyrg52.html',
        'md5': '1901d86a79c5e0c6a51bdc9a4cfd3769',
        'info_dict': {
            'id': 'a3drtehyrg52',
            'title': 'NFL 2023 W04 DET@GB',
            'ext': 'mp4'
        },
    }, {
        'url': 'https://aparat.cam/n4d6dh0wvlpr',
        'only_matching': True,
    }, {
-        'url': 'https://wolfstream.tv/nthme29v9u2x',
+        'url': 'https://uqload.to/ug5somm0ctnk.html',
        'only_matching': True,
    }, {
        'url': 'https://highstream.tv/2owiyz3sjoux',
        'only_matching': True,
    }, {
        'url': 'https://vedbam.xyz/6lnbkci96wly.html',
        'only_matching': True,
    }]
-    @staticmethod
+    @classmethod
-    def _extract_urls(webpage):
+    def _extract_urls(cls, webpage):
-        return [
+
-            mobj.group('url')
+        def yield_urls():
-            for mobj in re.finditer(
+            for regex in cls._EMBED_REGEX:
-                r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
+                for mobj in re.finditer(regex, webpage):
-                % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
+                    yield mobj.group('url')
-                webpage)]
+
        return list(yield_urls())
    def _real_extract(self, url):
-        host, video_id = re.match(self._VALID_URL, url).groups()
+        host, video_id = self._match_valid_url(url).group('host', 'id')
-        url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
+        url = 'https://%s/%s' % (
            host,
            'embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
        webpage = self._download_webpage(url, video_id)
-
+        container_div = get_element_by_id('container', webpage) or webpage
-        if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
+        if self._search_regex(
                r'>This server is in maintenance mode\.', container_div,
                'maint error', group=0, default=None):
            raise ExtractorError(clean_html(container_div), expected=True)
        if self._search_regex(
                self._FILE_NOT_FOUND_REGEXES, container_div,
                'missing video error', group=0, default=None):
            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
        fields = self._hidden_inputs(webpage)
@ -122,59 +226,43 @@ class XFileShareIE(InfoExtractor):
                    'Content-type': 'application/x-www-form-urlencoded',
                })
-        title = (self._search_regex(
+        title = (
-            (r'style="z-index: [0-9]+;">([^<]+)</span>',
+            self._search_regex(self._TITLE_REGEXES, webpage, 'title', default=None)
-             r'<td nowrap>([^<]+)</td>',
+            or self._og_search_title(webpage, default=None)
-             r'h4-fine[^>]*>([^<]+)<',
+            or video_id).strip()
             r'>Watch (.+)[ <]',
             r'<h2 class="video-page-head">([^<]+)</h2>',
             r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<',  # streamin.to
             r'title\s*:\s*"([^"]+)"'),  # govid.me
            webpage, 'title', default=None) or self._og_search_title(
            webpage, default=None) or video_id).strip()
-        for regex, func in (
+        obf_code = True
-                (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
+        while obf_code:
-                (r'(ﾟ.+)', aa_decode)):
+            for regex, func in (
-            obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
+                    (r'(?s)(?<!-)\b(eval\(function\(p,a,c,k,e,d\)\{(?:(?!</script>).)+\)\))',
-            if obf_code:
+                     decode_packed_codes),
-                webpage = webpage.replace(obf_code, func(obf_code))
+                    (r'(ﾟ.+)', aa_decode)):
                obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
                if obf_code:
                    webpage = webpage.replace(obf_code, func(obf_code))
                    break
-        formats = []
+        jwplayer_data = self._find_jwplayer_data(
            webpage.replace(r'\'', '\''), video_id)
        result = self._parse_jwplayer_data(
            jwplayer_data, video_id, require_title=False,
            m3u8_id='hls', mpd_id='dash')
-        jwplayer_data = self._search_regex(
+        if not traverse_obj(result, 'formats'):
            [
                r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
                r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
            ], webpage,
            'jwplayer data', default=None)
        if jwplayer_data:
            jwplayer_data = self._parse_json(
                jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
            if jwplayer_data:
-                formats = self._parse_jwplayer_data(
+                self.report_warning(
-                    jwplayer_data, video_id, False,
+                    'Failed to extract JWPlayer formats', video_id=video_id)
-                    m3u8_id='hls', mpd_id='dash')['formats']
+            urls = set()
-
+            for regex in self._SOURCE_URL_REGEXES:
        if not formats:
            urls = []
            for regex in (
                    r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
                    r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
                    r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
                    r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
                for mobj in re.finditer(regex, webpage):
-                    video_url = mobj.group('url')
+                    urls.add(mobj.group('url'))
                    if video_url not in urls:
                        urls.append(video_url)
            sources = self._search_regex(
                r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
-            if sources:
+            urls.update(traverse_obj(sources, (T(lambda s: self._parse_json(s, video_id)), Ellipsis)))
                urls.extend(self._parse_json(sources, video_id))
            formats = []
-            for video_url in urls:
+            for video_url in traverse_obj(urls, (Ellipsis, T(url_or_none))):
                if determine_ext(video_url) == 'm3u8':
                    formats.extend(self._extract_m3u8_formats(
                        video_url, video_id, 'mp4',
@ -185,17 +273,19 @@ class XFileShareIE(InfoExtractor):
                        'url': video_url,
                        'format_id': 'sd',
                    })
-        self._sort_formats(formats)
+            result = {'formats': formats}
        self._sort_formats(result['formats'])
        thumbnail = self._search_regex(
-            [
+            self._THUMBNAIL_REGEXES, webpage, 'thumbnail', default=None)
                r'<video[^>]+poster="([^"]+)"',
                r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
            ], webpage, 'thumbnail', default=None)
-        return {
+        if not (title or result.get('title')):
            title = self._generic_title(url) or video_id
        return merge_dicts(result, {
            'id': video_id,
-            'title': title,
+            'title': title or None,
            'thumbnail': thumbnail,
-            'formats': formats,
+            'http_headers': {'Referer': url}
-        }
+        })
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@ -1,20 +1,38 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import itertools
 import re
 from time import sleep
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    extract_attributes,
    ExtractorError,
    get_element_by_class,
    get_element_by_id,
    int_or_none,
-    str_to_int,
+    merge_dicts,
    parse_count,
    parse_qs,
    T,
    traverse_obj,
    unified_strdate,
    url_or_none,
    urljoin,
 )
 class YouPornIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+    _VALID_URL = (
        r'youporn:(?P<id>\d+)',
        r'''(?x)
            https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)
            (?:/(?:(?P<display_id>[^/?#&]+)/?)?)?(?:[#?]|$)
    '''
    )
    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
    _TESTS = [{
        'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
        'md5': '3744d24c50438cf5b6f6d59feb5055c2',
@ -34,7 +52,7 @@ class YouPornIE(InfoExtractor):
            'tags': list,
            'age_limit': 18,
        },
-        'skip': 'This video has been disabled',
+        'skip': 'This video has been deactivated',
    }, {
        # Unknown uploader
        'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
@ -66,57 +84,104 @@ class YouPornIE(InfoExtractor):
    }, {
        'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
        'only_matching': True,
    }, {
        'url': 'https://www.youporn.com/watch/16290308/tinderspecial-trailer1/',
        'info_dict': {
            'id': '16290308',
            'age_limit': 18,
            'categories': [],
            'description': None,  # SEO spam using title removed
            'display_id': 'tinderspecial-trailer1',
            'duration': 298.0,
            'ext': 'mp4',
            'upload_date': '20201123',
            'uploader': 'Ersties',
            'tags': [],
            'thumbnail': 'https://fi1.ypncdn.com/m=eaSaaTbWx/202011/23/16290308/original/3.jpg',
            'timestamp': 1606147564,
            'title': 'Tinder In Real Life',
            'view_count': int,
        }
    }]
-    @staticmethod
+    @classmethod
-    def _extract_urls(webpage):
+    def _extract_urls(cls, webpage):
-        return re.findall(
+        def yield_urls():
-            r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)',
+            for p in cls._EMBED_REGEX:
-            webpage)
+                for m in re.finditer(p, webpage):
                    yield m.group('url')
        return list(yield_urls())
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        # A different video ID (data-video-id) is hidden in the page but
-        video_id = mobj.group('id')
+        # never seems to be used
-        display_id = mobj.group('display_id') or video_id
+        video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
        url = 'http://www.youporn.com/watch/%s' % (video_id,)
        webpage = self._download_webpage(
            url, video_id, headers={'Cookie': 'age_verified=1'})
-        definitions = self._download_json(
+        watchable = self._search_regex(
-            'https://www.youporn.com/api/video/media_definitions/%s/' % video_id,
+            r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''',
-            display_id)
+            webpage, 'watchability', default=None)
        if not watchable:
            msg = re.split(r'\s{4}', clean_html(get_element_by_id(
                'mainContent', webpage)) or '')[0]
            raise ExtractorError(
                ('%s says: %s' % (self.IE_NAME, msg))
                if msg else 'Video unavailable: no reason found',
                expected=True)
        # internal ID ?
        # video_id = extract_attributes(watchable).get('data-video-id')
        playervars = self._search_json(
            r'\bplayervars\s*:', webpage, 'playervars', video_id)
        def get_fmt(x):
            v_url = url_or_none(x.get('videoUrl'))
            if v_url:
                x['videoUrl'] = v_url
                return (x['format'], x)
        defs_by_format = dict(traverse_obj(playervars, (
            'mediaDefinitions', lambda _, v: v.get('format'), T(get_fmt))))
        def get_format_data(f):
            if f not in defs_by_format:
                return []
            return self._download_json(
                defs_by_format[f]['videoUrl'], video_id, '{0}-formats'.format(f))
        formats = []
-        for definition in definitions:
+        # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
-            if not isinstance(definition, dict):
+        for hls_url in traverse_obj(
-                continue
+                get_format_data('hls'),
-            video_url = url_or_none(definition.get('videoUrl'))
+                (lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'),
-            if not video_url:
+                (Ellipsis, 'videoUrl')):
-                continue
+            formats.extend(self._extract_m3u8_formats(
-            f = {
+                hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls',
-                'url': video_url,
+                entry_protocol='m3u8_native'))
-                'filesize': int_or_none(definition.get('videoSize')),
+
-            }
+        for f in traverse_obj(get_format_data('mp4'), (
-            height = int_or_none(definition.get('quality'))
+                lambda _, v: v.get('videoUrl'), {
                    'url': ('videoUrl', T(url_or_none)),
                    'filesize': ('videoSize', T(int_or_none)),
                    'height': ('quality', T(int_or_none)),
                }, T(lambda x: x.get('videoUrl') and x))):
            # Video URL's path looks like this:
            #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
            #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
            #  /videos/201703/11/109285532/1080P_4000K_109285532.mp4
            # We will benefit from it by extracting some metadata
-            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
+            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', f['videoUrl'])
            if mobj:
-                if not height:
+                if not f.get('height'):
-                    height = int(mobj.group('height'))
+                    f['height'] = int(mobj.group('height'))
-                bitrate = int(mobj.group('bitrate'))
+                f['tbr'] = int(mobj.group('bitrate'))
-                f.update({
+                f['format_id'] = '%dp-%dk' % (f['height'], f['tbr'])
                    'format_id': '%dp-%dk' % (height, bitrate),
                    'tbr': bitrate,
                })
            f['height'] = height
            formats.append(f)
        self._sort_formats(formats)
        webpage = self._download_webpage(
            'http://www.youporn.com/watch/%s' % video_id, display_id,
            headers={'Cookie': 'age_verified=1'})
        title = self._html_search_regex(
            r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
            webpage, 'title', default=None) or self._og_search_title(
@ -131,8 +196,10 @@ class YouPornIE(InfoExtractor):
        thumbnail = self._search_regex(
            r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
            webpage, 'thumbnail', fatal=False, group='thumbnail')
-        duration = int_or_none(self._html_search_meta(
+        duration = traverse_obj(playervars, ('duration', T(int_or_none)))
-            'video:duration', webpage, 'duration', fatal=False))
+        if duration is None:
            duration = int_or_none(self._html_search_meta(
                'video:duration', webpage, 'duration', fatal=False))
        uploader = self._html_search_regex(
            r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
@ -148,11 +215,11 @@ class YouPornIE(InfoExtractor):
        view_count = None
        views = self._search_regex(
-            r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage,
+            r'(<div\s[^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>',
-            'views', default=None)
+            webpage, 'views', default=None)
        if views:
-            view_count = str_to_int(extract_attributes(views).get('data-value'))
+            view_count = parse_count(extract_attributes(views).get('data-value'))
-        comment_count = str_to_int(self._search_regex(
+        comment_count = parse_count(self._search_regex(
            r'>All [Cc]omments? \(([\d,.]+)\)',
            webpage, 'comment count', default=None))
@ -168,7 +235,10 @@ class YouPornIE(InfoExtractor):
            r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
            'tags')
-        return {
+        data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False) or {}
        data.pop('url', None)
        result = merge_dicts(data, {
            'id': video_id,
            'display_id': display_id,
            'title': title,
@ -183,4 +253,442 @@ class YouPornIE(InfoExtractor):
            'tags': tags,
            'age_limit': age_limit,
            'formats': formats,
-        }
+        })
        # Remove promotional non-description
        if result.get('description', '').startswith(
                'Watch %s online' % (result['title'],)):
            del result['description']
        return result
 class YouPornListBase(InfoExtractor):
    # pattern in '.title-text' element of page section containing videos
    _PLAYLIST_TITLEBAR_RE = r'\s+[Vv]ideos\s*$'
    _PAGE_RETRY_COUNT = 0  # ie, no retry
    _PAGE_RETRY_DELAY = 2  # seconds
    def _get_next_url(self, url, pl_id, html):
        return urljoin(url, self._search_regex(
            r'''<a\s[^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''',
            get_element_by_id('next', html) or '', 'next page',
            group='url', default=None))
    @classmethod
    def _get_title_from_slug(cls, title_slug):
        return re.sub(r'[_-]', ' ', title_slug)
    def _entries(self, url, pl_id, html=None, page_num=None):
        # separates page sections
        PLAYLIST_SECTION_RE = (
            r'''<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?title-bar(?:\s+[\w$-]+|\s)*\1[^>]*>'''
        )
        # contains video link
        VIDEO_URL_RE = r'''(?x)
            <div\s[^>]*\bdata-video-id\s*=\s*('|")\d+\1[^>]*>\s*
            (?:<div\b[\s\S]+?</div>\s*)*
            <a\s[^>]*\bhref\s*=\s*('|")(?P<url>(?:(?!\2)[^>])+)\2
        '''
        def yield_pages(url, html=html, page_num=page_num):
            fatal = not html
            for pnum in itertools.count(start=page_num or 1):
                if not html:
                    html = self._download_webpage(
                        url, pl_id, note='Downloading page %d' % pnum,
                        fatal=fatal)
                if not html:
                    break
                fatal = False
                yield (url, html, pnum)
                # explicit page: extract just that page
                if page_num is not None:
                    break
                next_url = self._get_next_url(url, pl_id, html)
                if not next_url or next_url == url:
                    break
                url, html = next_url, None
        def retry_page(msg, tries_left, page_data):
            if tries_left <= 0:
                return
            self.report_warning(msg, pl_id)
            sleep(self._PAGE_RETRY_DELAY)
            return next(
                yield_pages(page_data[0], page_num=page_data[2]), None)
        def yield_entries(html):
            for frag in re.split(PLAYLIST_SECTION_RE, html):
                if not frag:
                    continue
                t_text = get_element_by_class('title-text', frag or '')
                if not (t_text and re.search(self._PLAYLIST_TITLEBAR_RE, t_text)):
                    continue
                for m in re.finditer(VIDEO_URL_RE, frag):
                    video_url = urljoin(url, m.group('url'))
                    if video_url:
                        yield self.url_result(video_url)
        last_first_url = None
        for page_data in yield_pages(url, html=html, page_num=page_num):
            # page_data: url, html, page_num
            first_url = None
            tries_left = self._PAGE_RETRY_COUNT + 1
            while tries_left > 0:
                tries_left -= 1
                for from_ in yield_entries(page_data[1]):
                    # may get the same page twice instead of empty page
                    # or (site bug) intead of actual next page
                    if not first_url:
                        first_url = from_['url']
                        if first_url == last_first_url:
                            # sometimes (/porntags/) the site serves the previous page
                            # instead but may provide the correct page after a delay
                            page_data = retry_page(
                                'Retrying duplicate page...', tries_left, page_data)
                            if page_data:
                                first_url = None
                                break
                            continue
                    yield from_
                else:
                    if not first_url and 'no-result-paragarph1' in page_data[1]:
                        page_data = retry_page(
                            'Retrying empty page...', tries_left, page_data)
                        if page_data:
                            continue
                    else:
                        # success/failure
                        break
            # may get an infinite (?) sequence of empty pages
            if not first_url:
                break
            last_first_url = first_url
    def _real_extract(self, url, html=None):
        # exceptionally, id may be None
        m_dict = self._match_valid_url(url).groupdict()
        pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort'))
        qs = parse_qs(url)
        for q, v in qs.items():
            if v:
                qs[q] = v[-1]
            else:
                del qs[q]
        base_id = pl_id or 'YouPorn'
        title = self._get_title_from_slug(base_id)
        if page_type:
            title = '%s %s' % (page_type.capitalize(), title)
        base_id = [base_id.lower()]
        if sort is None:
            title += ' videos'
        else:
            title = '%s videos by %s' % (title, re.sub(r'[_-]', ' ', sort))
            base_id.append(sort)
        if qs:
            ps = ['%s=%s' % item for item in sorted(qs.items())]
            title += ' (%s)' % ','.join(ps)
            base_id.extend(ps)
        pl_id = '/'.join(base_id)
        return self.playlist_result(
            self._entries(url, pl_id, html=html,
                          page_num=int_or_none(qs.get('page'))),
            playlist_id=pl_id, playlist_title=title)
 class YouPornCategoryIE(YouPornListBase):
    IE_DESC = 'YouPorn category, with sorting, filtering and pagination'
    _VALID_URL = r'''(?x)
        https?://(?:www\.)?youporn\.com/
        (?P<type>category)/(?P<id>[^/?#&]+)
        (?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$)
    '''
    _TESTS = [{
        'note': 'Full list with pagination',
        'url': 'https://www.youporn.com/category/lingerie/popular/',
        'info_dict': {
            'id': 'lingerie/popular',
            'title': 'Category lingerie videos by popular',
        },
        'playlist_mincount': 39,
    }, {
        'note': 'Filtered paginated list with single page result',
        'url': 'https://www.youporn.com/category/lingerie/duration/?min_minutes=10',
        'info_dict': {
            'id': 'lingerie/duration/min_minutes=10',
            'title': 'Category lingerie videos by duration (min_minutes=10)',
        },
        'playlist_maxcount': 30,
    }, {
        'note': 'Single page of full list',
        'url': 'https://www.youporn.com/category/lingerie/popular?page=1',
        'info_dict': {
            'id': 'lingerie/popular/page=1',
            'title': 'Category lingerie videos by popular (page=1)',
        },
        'playlist_count': 30,
    }]
 class YouPornChannelIE(YouPornListBase):
    IE_DESC = 'YouPorn channel, with sorting and pagination'
    _VALID_URL = r'''(?x)
        https?://(?:www\.)?youporn\.com/
        (?P<type>channel)/(?P<id>[^/?#&]+)
        (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
    '''
    _TESTS = [{
        'note': 'Full list with pagination',
        'url': 'https://www.youporn.com/channel/x-feeds/',
        'info_dict': {
            'id': 'x-feeds',
            'title': 'Channel X-Feeds videos',
        },
        'playlist_mincount': 37,
    }, {
        'note': 'Single page of full list (no filters here)',
        'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1',
        'info_dict': {
            'id': 'x-feeds/duration/page=1',
            'title': 'Channel X-Feeds videos by duration (page=1)',
        },
        'playlist_count': 24,
    }]
    @staticmethod
    def _get_title_from_slug(title_slug):
        return re.sub(r'_', ' ', title_slug).title()
 class YouPornCollectionIE(YouPornListBase):
    IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination'
    _VALID_URL = r'''(?x)
        https?://(?:www\.)?youporn\.com/
        (?P<type>collection)s/videos/(?P<id>\d+)
        (?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$)
    '''
    _PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+in\s'
    _TESTS = [{
        'note': 'Full list with pagination',
        'url': 'https://www.youporn.com/collections/videos/33044251/',
        'info_dict': {
            'id': '33044251',
            'title': 'Collection Sexy Lips videos',
            'uploader': 'ph-littlewillyb',
        },
        'playlist_mincount': 50,
    }, {
        'note': 'Single page of full list (no filters here)',
        'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1',
        'info_dict': {
            'id': '33044251/time/page=1',
            'title': 'Collection Sexy Lips videos by time (page=1)',
            'uploader': 'ph-littlewillyb',
        },
        'playlist_count': 20,
    }]
    def _real_extract(self, url):
        pl_id = self._match_id(url)
        html = self._download_webpage(url, pl_id)
        playlist = super(YouPornCollectionIE, self)._real_extract(url, html=html)
        infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class(
            'collection-infos', html)) or '')
        title, uploader = self._search_regex(
            r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)',
            infos, 'title/uploader', group=('title', 'uploader'), default=(None, None))
        return merge_dicts({
            'title': playlist['title'].replace(playlist['id'].split('/')[0], title),
            'uploader': uploader,
        }, playlist) if title else playlist
 class YouPornTagIE(YouPornListBase):
    IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination'
    _VALID_URL = r'''(?x)
        https?://(?:www\.)?youporn\.com/
        porn(?P<type>tag)s/(?P<id>[^/?#&]+)
        (?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$)
    '''
    _PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+tagged\s'
    _PAGE_RETRY_COUNT = 1
    _TESTS = [{
        'note': 'Full list with pagination',
        'url': 'https://www.youporn.com/porntags/austrian',
        'info_dict': {
            'id': 'austrian',
            'title': 'Tag austrian videos',
        },
        'playlist_mincount': 35,
        'expected_warnings': ['Retrying duplicate page'],
    }, {
        'note': 'Filtered paginated list with single page result',
        'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10',
        'info_dict': {
            'id': 'austrian/duration/min_minutes=10',
            'title': 'Tag austrian videos by duration (min_minutes=10)',
        },
        # number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3,
        # or more, varying with number of ads; let's set max as 9x4
        # NB col 1 may not be shown in non-JS page with site CSS and zoom 100%
        'playlist_maxcount': 32,
        'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
    }, {
        'note': 'Single page of full list',
        'url': 'https://www.youporn.com/porntags/austrian/?page=1',
        'info_dict': {
            'id': 'austrian/page=1',
            'title': 'Tag austrian videos (page=1)',
        },
        'playlist_mincount': 32,
        'playlist_maxcount': 34,
        'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
    }]
    # YP tag navigation is broken, loses sort
    def _get_next_url(self, url, pl_id, html):
        next_url = super(YouPornTagIE, self)._get_next_url(url, pl_id, html)
        if next_url:
            n = self._match_valid_url(next_url)
            if n:
                s = n.groupdict().get('sort')
            if s:
                u = self._match_valid_url(url)
                if u:
                    u = u.groupdict().get('sort')
                    if s and not u:
                        n = n.end('sort')
                        next_url = next_url[:n] + '/' + u + next_url[n:]
        return next_url
 class YouPornStarIE(YouPornListBase):
    IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination'
    _VALID_URL = r'''(?x)
        https?://(?:www\.)?youporn\.com/
        (?P<type>pornstar)/(?P<id>[^/?#&]+)
        (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
    '''
    _PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+[fF]eaturing\s'
    _TESTS = [{
        'note': 'Full list with pagination',
        'url': 'https://www.youporn.com/pornstar/daynia/',
        'info_dict': {
            'id': 'daynia',
            'title': 'Pornstar Daynia videos',
            'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+',
        },
        'playlist_mincount': 45,
    }, {
        'note': 'Single page of full list (no filters here)',
        'url': 'https://www.youporn.com/pornstar/daynia/?page=1',
        'info_dict': {
            'id': 'daynia/page=1',
            'title': 'Pornstar Daynia videos (page=1)',
            'description': 're:.{180,}',
        },
        'playlist_count': 26,
    }]
    @staticmethod
    def _get_title_from_slug(title_slug):
        return re.sub(r'_', ' ', title_slug).title()
    def _real_extract(self, url):
        pl_id = self._match_id(url)
        html = self._download_webpage(url, pl_id)
        playlist = super(YouPornStarIE, self)._real_extract(url, html=html)
        INFO_ELEMENT_RE = r'''(?x)
            <div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*>
            (?P<info>[\s\S]+?)(?:</div>\s*){6,}
        '''
        infos = self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default='')
        if infos:
            infos = re.sub(
                r'(?:\s*nl=nl)+\s*', ' ',
                re.sub(r'(?u)\s+', ' ', clean_html(
                    re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '')
        return merge_dicts({
            'description': infos.strip() or None,
        }, playlist)
 class YouPornVideosIE(YouPornListBase):
    IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination'
    _VALID_URL = r'''(?x)
        https?://(?:www\.)?youporn\.com/
            (?:(?P<id>browse)/)?
            (?P<sort>(?(id)
                (?:duration|rating|time|views)|
                (?:most_(?:favou?rit|view)ed|recommended|top_rated)?))
            (?:[/#?]|$)
    '''
    _PLAYLIST_TITLEBAR_RE = r'\s+(?:[Vv]ideos|VIDEOS)\s*$'
    _TESTS = [{
        'note': 'Full list with pagination (too long for test)',
        'url': 'https://www.youporn.com/',
        'info_dict': {
            'id': 'youporn',
            'title': 'YouPorn videos',
        },
        'only_matching': True,
    }, {
        'note': 'Full list with pagination (too long for test)',
        'url': 'https://www.youporn.com/recommended',
        'info_dict': {
            'id': 'youporn/recommended',
            'title': 'YouPorn videos by recommended',
        },
        'only_matching': True,
    }, {
        'note': 'Full list with pagination (too long for test)',
        'url': 'https://www.youporn.com/top_rated',
        'info_dict': {
            'id': 'youporn/top_rated',
            'title': 'YouPorn videos by top rated',
        },
        'only_matching': True,
    }, {
        'note': 'Full list with pagination (too long for test)',
        'url': 'https://www.youporn.com/browse/time',
        'info_dict': {
            'id': 'browse/time',
            'title': 'YouPorn videos by time',
        },
        'only_matching': True,
    }, {
        'note': 'Filtered paginated list with single page result',
        'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2',
        'info_dict': {
            'id': 'youporn/most_favorited/max_minutes=2/res=VR',
            'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)',
        },
        'playlist_mincount': 10,
        'playlist_maxcount': 28,
    }, {
        'note': 'Filtered paginated list with several pages',
        'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5',
        'info_dict': {
            'id': 'youporn/most_favorited/max_minutes=5/res=VR',
            'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)',
        },
        'playlist_mincount': 45,
    }, {
        'note': 'Single page of full list',
        'url': 'https://www.youporn.com/browse/time?page=1',
        'info_dict': {
            'id': 'browse/time/page=1',
            'title': 'YouPorn videos by time (page=1)',
        },
        'playlist_count': 36,
    }]
    @staticmethod
    def _get_title_from_slug(title_slug):
        return 'YouPorn' if title_slug == 'browse' else title_slug
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1647,10 +1647,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        except JSInterpreter.Exception as e:
            self.report_warning(
                '%s (%s %s)' % (
-                    self.__ie_msg(
+                    'Unable to decode n-parameter: download likely to be throttled',
                        'Unable to decode n-parameter: download likely to be throttled'),
                    error_to_compat_str(e),
-                    traceback.format_exc()))
+                    traceback.format_exc()),
                video_id=video_id)
            return
        self.write_debug('Decrypted nsig {0} => {1}'.format(n, ret))
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -186,11 +186,11 @@ def parseOpts(overrideArguments=None):
    general.add_option(
        '--mark-watched',
        action='store_true', dest='mark_watched', default=False,
-        help='Mark videos watched (YouTube only)')
+        help='Mark videos watched (if supported for site)')
    general.add_option(
        '--no-mark-watched',
        action='store_false', dest='mark_watched', default=False,
-        help='Do not mark videos watched (YouTube only)')
+        help='Do not mark videos watched')
    general.add_option(
        '--no-color', '--no-colors',
        action='store_true', dest='no_color',
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@ -74,8 +74,11 @@ class FFmpegPostProcessor(PostProcessor):
        return FFmpegPostProcessor(downloader)._versions
    def _determine_executables(self):
-        programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
+        # ordered to match prefer_ffmpeg!
        convs = ['ffmpeg', 'avconv']
        probes = ['ffprobe', 'avprobe']
        prefer_ffmpeg = True
        programs = convs + probes
        def get_ffmpeg_version(path):
            ver = get_exe_version(path, args=['-version'])
@ -96,6 +99,7 @@ class FFmpegPostProcessor(PostProcessor):
        self._paths = None
        self._versions = None
        location = None
        if self._downloader:
            prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
            location = self._downloader.params.get('ffmpeg_location')
@ -118,33 +122,21 @@ class FFmpegPostProcessor(PostProcessor):
                    location = os.path.dirname(os.path.abspath(location))
                    if basename in ('ffmpeg', 'ffprobe'):
                        prefer_ffmpeg = True
        self._paths = dict(
            (p, p if location is None else os.path.join(location, p))
            for p in programs)
        self._versions = dict(
            x for x in (
                (p, get_ffmpeg_version(self._paths[p])) for p in programs)
            if x[1] is not None)
-                self._paths = dict(
+        basenames = [None, None]
-                    (p, os.path.join(location, p)) for p in programs)
+        for i, progs in enumerate((convs, probes)):
-                self._versions = dict(
+            for p in progs[::-1 if prefer_ffmpeg is False else 1]:
-                    (p, get_ffmpeg_version(self._paths[p])) for p in programs)
+                if self._versions.get(p):
-        if self._versions is None:
+                    basenames[i] = p
-            self._versions = dict(
+                    break
-                (p, get_ffmpeg_version(p)) for p in programs)
+        self.basename, self.probe_basename = basenames
            self._paths = dict((p, p) for p in programs)
        if prefer_ffmpeg is False:
            prefs = ('avconv', 'ffmpeg')
        else:
            prefs = ('ffmpeg', 'avconv')
        for p in prefs:
            if self._versions[p]:
                self.basename = p
                break
        if prefer_ffmpeg is False:
            prefs = ('avprobe', 'ffprobe')
        else:
            prefs = ('ffprobe', 'avprobe')
        for p in prefs:
            if self._versions[p]:
                self.probe_basename = p
                break
    @property
    def available(self):
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -45,6 +45,7 @@ from .compat import (
    compat_casefold,
    compat_chr,
    compat_collections_abc,
    compat_contextlib_suppress,
    compat_cookiejar,
    compat_ctypes_WINFUNCTYPE,
    compat_datetime_timedelta_total_seconds,
@ -1855,25 +1856,18 @@ def write_json_file(obj, fn):
    try:
        with tf:
            json.dump(obj, tf)
-        if sys.platform == 'win32':
+        with compat_contextlib_suppress(OSError):
-            # Need to remove existing file on Windows, else os.rename raises
+            if sys.platform == 'win32':
-            # WindowsError or FileExistsError.
+                # Need to remove existing file on Windows, else os.rename raises
-            try:
+                # WindowsError or FileExistsError.
                os.unlink(fn)
            except OSError:
                pass
        try:
            mask = os.umask(0)
            os.umask(mask)
            os.chmod(tf.name, 0o666 & ~mask)
        except OSError:
            pass
        os.rename(tf.name, fn)
    except Exception:
-        try:
+        with compat_contextlib_suppress(OSError):
            os.remove(tf.name)
        except OSError:
            pass
        raise
@ -2033,14 +2027,13 @@ def extract_attributes(html_element):
    NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
    but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
    """
-    parser = HTMLAttributeParser()
+    ret = None
-    try:
+    # Older Python may throw HTMLParseError in case of malformed HTML (and on .close()!)
-        parser.feed(html_element)
+    with compat_contextlib_suppress(compat_HTMLParseError):
-        parser.close()
+        with contextlib.closing(HTMLAttributeParser()) as parser:
-    # Older Python may throw HTMLParseError in case of malformed HTML
+            parser.feed(html_element)
-    except compat_HTMLParseError:
+            ret = parser.attrs
-        pass
+    return ret or {}
    return parser.attrs
 def clean_html(html):
@ -2182,8 +2175,28 @@ def sanitize_url(url):
    return url
 def extract_basic_auth(url):
    parts = compat_urllib_parse.urlsplit(url)
    if parts.username is None:
        return url, None
    url = compat_urllib_parse.urlunsplit(parts._replace(netloc=(
        parts.hostname if parts.port is None
        else '%s:%d' % (parts.hostname, parts.port))))
    auth_payload = base64.b64encode(
        ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
    return url, 'Basic {0}'.format(auth_payload.decode('ascii'))
 def sanitized_Request(url, *args, **kwargs):
-    return compat_urllib_request.Request(escape_url(sanitize_url(url)), *args, **kwargs)
+    url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
    if auth_header is not None:
        headers = args[1] if len(args) > 1 else kwargs.get('headers')
        headers = headers or {}
        headers['Authorization'] = auth_header
        if len(args) <= 1 and kwargs.get('headers') is None:
            kwargs['headers'] = headers
            kwargs = compat_kwargs(kwargs)
    return compat_urllib_request.Request(url, *args, **kwargs)
 def expand_path(s):
@ -2221,7 +2234,8 @@ def _htmlentity_transform(entity_with_semicolon):
            numstr = '0%s' % numstr
        else:
            base = 10
-        # See https://github.com/ytdl-org/youtube-dl/issues/7518
+        # See https://github.com/ytdl-org/youtube-dl/issues/7518\
        # Also, weirdly, compat_contextlib_suppress fails here in 2.6
        try:
            return compat_chr(int(numstr, base))
        except ValueError:
@ -2328,11 +2342,9 @@ def make_HTTPS_handler(params, **kwargs):
        # Some servers may (wrongly) reject requests if ALPN extension is not sent. See:
        # https://github.com/python/cpython/issues/85140
        # https://github.com/yt-dlp/yt-dlp/issues/3878
-        try:
+        with compat_contextlib_suppress(AttributeError, NotImplementedError):
            # fails for Python < 2.7.10, not ssl.HAS_ALPN
            ctx.set_alpn_protocols(ALPN_PROTOCOLS)
        except (AttributeError, NotImplementedError):
            # Python < 2.7.10, not ssl.HAS_ALPN
            pass
    opts_no_check_certificate = params.get('nocheckcertificate', False)
    if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
@ -2342,12 +2354,10 @@ def make_HTTPS_handler(params, **kwargs):
            context.check_hostname = False
            context.verify_mode = ssl.CERT_NONE
-        try:
+        with compat_contextlib_suppress(TypeError):
            # Fails with Python 2.7.8 (create_default_context present
            # but HTTPSHandler has no context=)
            return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
        except TypeError:
            # Python 2.7.8
            # (create_default_context present but HTTPSHandler has no context=)
            pass
    if sys.version_info < (3, 2):
        return YoutubeDLHTTPSHandler(params, **kwargs)
@ -2361,15 +2371,24 @@ def make_HTTPS_handler(params, **kwargs):
        return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
-def bug_reports_message():
+def bug_reports_message(before=';'):
    if ytdl_is_updateable():
        update_cmd = 'type  youtube-dl -U  to update'
    else:
-        update_cmd = 'see  https://yt-dl.org/update  on how to update'
+        update_cmd = 'see  https://github.com/ytdl-org/youtube-dl/#user-content-installation  on how to update'
-    msg = '; please report this issue on https://yt-dl.org/bug .'
+
-    msg += ' Make sure you are using the latest version; %s.' % update_cmd
+    msg = (
-    msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
+        'please report this issue on https://github.com/ytdl-org/youtube-dl/issues ,'
-    return msg
+        ' using the appropriate issue template.'
        ' Make sure you are using the latest version; %s.'
        ' Be sure to call youtube-dl with the --verbose option and include the complete output.'
    ) % update_cmd
    before = (before or '').rstrip()
    if not before or before.endswith(('.', '!', '?')):
        msg = msg[0].title() + msg[1:]
    return (before + ' ' if before else '') + msg
 class YoutubeDLError(Exception):
@ -3156,12 +3175,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
    if timezone is None:
        timezone, date_str = extract_timezone(date_str)
-    try:
+    with compat_contextlib_suppress(ValueError):
        date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
        dt = datetime.datetime.strptime(date_str, date_format) - timezone
        return calendar.timegm(dt.timetuple())
    except ValueError:
        pass
 def date_formats(day_first=True):
@ -3181,17 +3198,13 @@ def unified_strdate(date_str, day_first=True):
    _, date_str = extract_timezone(date_str)
    for expression in date_formats(day_first):
-        try:
+        with compat_contextlib_suppress(ValueError):
            upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
        except ValueError:
            pass
    if upload_date is None:
        timetuple = email.utils.parsedate_tz(date_str)
        if timetuple:
-            try:
+            with compat_contextlib_suppress(ValueError):
                upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
            except ValueError:
                pass
    if upload_date is not None:
        return compat_str(upload_date)
@ -3220,11 +3233,9 @@ def unified_timestamp(date_str, day_first=True):
        date_str = m.group(1)
    for expression in date_formats(day_first):
-        try:
+        with compat_contextlib_suppress(ValueError):
            dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
            return calendar.timegm(dt.timetuple())
        except ValueError:
            pass
    timetuple = email.utils.parsedate_tz(date_str)
    if timetuple:
        return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone)
@ -3832,14 +3843,15 @@ class PUTRequest(compat_urllib_request.Request):
        return 'PUT'
-def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
+def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1, base=None):
    if get_attr:
        if v is not None:
            v = getattr(v, get_attr, None)
    if v in (None, ''):
        return default
    try:
-        return int(v) * invscale // scale
+        # like int, raise if base is specified and v is not a string
        return (int(v) if base is None else int(v, base=base)) * invscale // scale
    except (ValueError, TypeError, OverflowError):
        return default
Author	SHA1	Message	Date
Andrei Lebedev	1f538defab	Merge `b83889a20e` into `668332b973`	2024-05-06 11:10:48 +08:00
dirkf	668332b973	[YouPorn] Add playlist extractors * YouPornCategoryIE * YouPornChannelIE * YouPornCollectionIE * YouPornStarIE * YouPornTagIE * YouPornVideosIE,	2024-04-22 01:34:26 +01:00
dirkf	0b2ce3685e	[YouPorn] Improve extraction * detect unwatchable videos * improve duration extraction * fix count extraction and support large values * detect and remove SEO spam boilerplate description	2024-04-22 01:34:26 +01:00
dirkf	c2766cb80e	[test/test_download] Support 'playlist_maxcount:count' expected value * parallel to `playlist_mincount' * specify both for a range of playlist lengths * if max < min the test will always fail!	2024-04-22 01:34:26 +01:00
dirkf	eb38665438	[YouPorn] Incorporate yt-dlp PR 8827 * from https://github.com/yt-dlp/yt-dlp/pull/8827 * extract from webpage instead of broken API URL * thx The-MAGI	2024-04-22 01:34:26 +01:00
dirkf	e0727e4ab6	[postprocessor/ffmpeg] Fix finding ffprobe (bug in `21792b8`) Fixes `21792b88b7 (commitcomment-140705274)`, thx: vonProteus	2024-04-07 15:33:30 +01:00
Ori Avtalion	4ea59c6107	[utils] Fix crash in _report_ignoring_subs from `c58b655` (#32762 ) Align `utils.bug_reports_message()` with yt-dlp https://github.com/yt-dlp/yt-dlp/commit/5873d4ccdd, thanks fstirlitz --------- Co-authored-by: dirkf <fieldhouse@gmx.net>	2024-04-05 15:25:29 +01:00
dirkf	21792b88b7	[external/FFmpeg] Fix and improve --ffmpeg-location handling * pass YoutubeDL (FileDownloader) to FFmpegPostProcessor constructor * consolidate path search in FFmpegPostProcessor * make availability of FFmpegFD depend on existence of FFmpegPostProcessor * detect ffmpeg executable on instantiation of FFmpegFD * resolves #32735	2024-03-27 13:11:17 +00:00
dirkf	d8f134a664	[downloader/external] Fix "Resource Warning" in downloader test * add compat_subprocess_Popen context manager * apply context manager in FFmpegFD._call_downloader()	2024-03-27 13:11:17 +00:00
dirkf	31a15a7c8d	[compat] Simplify/fix compat_html_parser_HTMLParseError	2024-03-27 13:11:17 +00:00
dirkf	19dc10b986	[utils] Apply compat_contextlib_suppress	2024-03-27 13:11:17 +00:00
dirkf	182f63e82a	[compat] Add compat_contextlib_suppress with compat_contextlib_suppress(*Exceptions): # code that fails silently for any of Exceptions	2024-03-27 13:11:17 +00:00
gy-chen	71211e7db7	[Youtube] Fix unwanted private method __ie_msg in `f8b0135850` Fixes `AttributeError no attribute '_YoutubeIE__ie_msg'` if unable to decode n-parameter	2024-03-23 15:30:13 +00:00
Zizheng Guo	a96a45b2cd	[Vimeo] Improve `config` extraction (#32742 ) * update for more robust json parsing	2024-03-12 11:44:13 +00:00
hatsomatt	820fae3b3a	[Videa] Fix extraction * update API URL * from https://github.com/yt-dlp/yt-dlp/pull/8003 * thanks to the authors! Closes yt-dlp/7427 Authored by: hatsomatt, aky-01	2024-03-08 13:14:52 +00:00
dirkf	aef24d97e9	[Videa] Align with yt-dlp	2024-03-08 13:14:52 +00:00
dirkf	f7b30e3f73	[XFileShare] Update extractor for 2024 * simplify aa_decode() * review and update supported sites and tests * in above, include FileMoon.sx, and remove separate module * incorporate changes from yt-dlp * allow for decoding multiple scripts (eg, FileMoon) * use new JWPlayer extraction	2024-03-08 13:03:42 +00:00
dirkf	f66372403f	[InfoExtractor] Rework and improve JWPlayer extraction * use traverse_obj() and _search_json() * support playlist `.load({video1},{video2}, ...)` * support transform_source=... for _extract_jwplayer_data()	2024-03-08 13:03:42 +00:00
dirkf	7216fa2ac4	[InfoExtractor] Add `_search_json()` * uses the error diagnostic to truncate the JSON string * may be confused by non-C-Pythons	2024-03-08 13:03:42 +00:00
dirkf	acc383b9e3	[utils] Let int_or_none() accept a base, like int()	2024-03-08 13:03:42 +00:00
Hubert Hirtz	f0812d7848	[utils] Handle user:pass in URLs (#28801 ) * Handle user:pass in URLs Fixes "nonnumeric port" errors when youtube-dl is given URLs with usernames and passwords such as: http://username:password@example.com/myvideo.mp4 Refs: - https://en.wikipedia.org/wiki/Basic_access_authentication - https://tools.ietf.org/html/rfc1738#section-3.1 - https://docs.python.org/3.8/library/urllib.parse.html#urllib.parse.urlsplit Fixes #18276 (point 4) Fixes #20258 Fixes #26211 (see comment) * Align code with yt-dlp --------- Co-authored-by: dirkf <fieldhouse@gmx.net>	2024-03-04 01:27:55 +00:00
Aaron Tan	40bd5c1815	[caffeine.tv] Add new extractor (#32514 ) * Add CaffeineTVIE info extractor to support site caffeine.tv --------- Co-authored-by: dirkf <fieldhouse@gmx.net>	2024-02-22 12:54:07 +00:00
dirkf	70f230f9cf	[GBNews]Add new extractor for GB News TV channel (#29432 ) * Add extractor for GB News TV channel * Support more GBNews URL formats Allow alphanumeric and _ in place of `shows`, which redirect to site's preferred URL * Update for 2024	2024-02-22 12:44:00 +00:00
dirkf	48ddab1f3a	[downloader/external] Fix WgetFD proxy (rev 2) From PR (defunct source), closes #29343. Matches https://github.com/yt-dlp/yt-dlp/pull/3152 Thx former user kikuyan.	2024-02-21 16:29:08 +00:00
dirkf	7687389f08	[Vbox7] Improve extraction, adding features from yt-dlp PR #9100 * changes from https://github.com/yt-dlp/yt-dlp/pull/9100 (thx seproDev): - attempt HLS extraction - re-enable XFF - test `view_count`, `duration` extraction * improve commenting, error checks	2024-02-19 00:53:22 +00:00
dirkf	b83889a20e	Update mark_watched doc	2022-11-15 19:08:40 +00:00
dirkf	6b65df9cad	[common] Make mark_watched() more lenient	2022-11-15 19:04:07 +00:00
dirkf	23fe05b4f7	Make empty result false	2022-11-15 18:34:50 +00:00
dirkf	8479922ba1	QA	2022-11-14 20:55:15 +00:00
dirkf	a61abdaa68	`.chain(*iterables)`	2022-11-14 16:37:05 +00:00
dirkf	2a47a5a3f9	`_EMBED_REGEX`	2022-11-14 16:28:24 +00:00
dirkf	8f0e4816e3	Possibly working version pt.2	2022-11-14 16:24:27 +00:00
dirkf	6468249594	Possibly working version without storyboards (permanently?) and sttls (for now)	2022-11-14 16:00:04 +00:00
Andrei Lebedev	a98ff43ac2	[Panopto] Backport from yt-dlp	2022-07-20 01:09:05 +02:00