Compare commits

...

14 Commits

Author SHA1 Message Date
Philip Xu aff288b36d
Merge cddc1cb9b8 into 668332b973 2024-05-06 13:45:18 +08:00
dirkf 668332b973 [YouPorn] Add playlist extractors
* YouPornCategoryIE
* YouPornChannelIE
* YouPornCollectionIE
* YouPornStarIE
* YouPornTagIE
* YouPornVideosIE,
2024-04-22 01:34:26 +01:00
dirkf 0b2ce3685e [YouPorn] Improve extraction
* detect unwatchable videos
* improve duration extraction
* fix count extraction and support large values
* detect and remove SEO spam boilerplate description
2024-04-22 01:34:26 +01:00
dirkf c2766cb80e [test/test_download] Support 'playlist_maxcount:count' expected value
* parallel to `playlist_mincount'
* specify both for a range of playlist lengths
* if max < min the test will always fail!
2024-04-22 01:34:26 +01:00
dirkf eb38665438 [YouPorn] Incorporate yt-dlp PR 8827
* from https://github.com/yt-dlp/yt-dlp/pull/8827
* extract from webpage instead of broken API URL
* thx The-MAGI
2024-04-22 01:34:26 +01:00
dirkf e0727e4ab6 [postprocessor/ffmpeg] Fix finding ffprobe (bug in 21792b8)
Fixes 21792b88b7 (commitcomment-140705274), thx: vonProteus
2024-04-07 15:33:30 +01:00
Ori Avtalion 4ea59c6107
[utils] Fix crash in _report_ignoring_subs from c58b655 (#32762)
Align `utils.bug_reports_message()` with yt-dlp https://github.com/yt-dlp/yt-dlp/commit/5873d4ccdd, thanks fstirlitz

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
2024-04-05 15:25:29 +01:00
dirkf 21792b88b7 [external/FFmpeg] Fix and improve --ffmpeg-location handling
* pass YoutubeDL (FileDownloader) to FFmpegPostProcessor constructor
* consolidate path search in FFmpegPostProcessor
* make availability of FFmpegFD depend on existence of FFmpegPostProcessor
* detect ffmpeg executable on instantiation of FFmpegFD
* resolves #32735
2024-03-27 13:11:17 +00:00
dirkf d8f134a664 [downloader/external] Fix "Resource Warning" in downloader test
* add compat_subprocess_Popen context manager
* apply context manager in FFmpegFD._call_downloader()
2024-03-27 13:11:17 +00:00
dirkf 31a15a7c8d [compat] Simplify/fix compat_html_parser_HTMLParseError 2024-03-27 13:11:17 +00:00
dirkf 19dc10b986 [utils] Apply compat_contextlib_suppress 2024-03-27 13:11:17 +00:00
dirkf 182f63e82a [compat] Add compat_contextlib_suppress
with compat_contextlib_suppress(*Exceptions):
    # code that fails silently for any of Exceptions
2024-03-27 13:11:17 +00:00
gy-chen 71211e7db7
[Youtube] Fix unwanted private method __ie_msg in f8b0135850
Fixes `AttributeError no attribute '_YoutubeIE__ie_msg'` if unable to decode n-parameter
2024-03-23 15:30:13 +00:00
Philip Xu cddc1cb9b8 [douyin] Add new extractor 2021-07-12 05:08:56 -04:00
10 changed files with 819 additions and 159 deletions

View File

@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import ( from test.helper import (
assertGreaterEqual, assertGreaterEqual,
assertLessEqual,
expect_warnings, expect_warnings,
get_params, get_params,
gettestcases, gettestcases,
@ -122,7 +123,10 @@ def generator(test_case, tname):
params['outtmpl'] = tname + '_' + params['outtmpl'] params['outtmpl'] = tname + '_' + params['outtmpl']
if is_playlist and 'playlist' not in test_case: if is_playlist and 'playlist' not in test_case:
params.setdefault('extract_flat', 'in_playlist') params.setdefault('extract_flat', 'in_playlist')
params.setdefault('playlistend', test_case.get('playlist_mincount')) params.setdefault('playlistend',
test_case['playlist_maxcount'] + 1
if test_case.get('playlist_maxcount')
else test_case.get('playlist_mincount'))
params.setdefault('skip_download', True) params.setdefault('skip_download', True)
ydl = YoutubeDL(params, auto_init=False) ydl = YoutubeDL(params, auto_init=False)
@ -190,6 +194,14 @@ def generator(test_case, tname):
'Expected at least %d in playlist %s, but got only %d' % ( 'Expected at least %d in playlist %s, but got only %d' % (
test_case['playlist_mincount'], test_case['url'], test_case['playlist_mincount'], test_case['url'],
len(res_dict['entries']))) len(res_dict['entries'])))
if 'playlist_maxcount' in test_case:
assertLessEqual(
self,
len(res_dict['entries']),
test_case['playlist_maxcount'],
'Expected at most %d in playlist %s, but got %d' % (
test_case['playlist_maxcount'], test_case['url'],
len(res_dict['entries'])))
if 'playlist_count' in test_case: if 'playlist_count' in test_case:
self.assertEqual( self.assertEqual(
len(res_dict['entries']), len(res_dict['entries']),

View File

@ -18,6 +18,7 @@ from test.helper import (
) )
from youtube_dl import YoutubeDL from youtube_dl import YoutubeDL
from youtube_dl.compat import ( from youtube_dl.compat import (
compat_contextlib_suppress,
compat_http_cookiejar_Cookie, compat_http_cookiejar_Cookie,
compat_http_server, compat_http_server,
compat_kwargs, compat_kwargs,
@ -35,6 +36,9 @@ from youtube_dl.downloader.external import (
HttpieFD, HttpieFD,
WgetFD, WgetFD,
) )
from youtube_dl.postprocessor import (
FFmpegPostProcessor,
)
import threading import threading
TEST_SIZE = 10 * 1024 TEST_SIZE = 10 * 1024
@ -227,7 +231,17 @@ class TestAria2cFD(unittest.TestCase):
self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd) self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd)
@ifExternalFDAvailable(FFmpegFD) # Handle delegated availability
def ifFFmpegFDAvailable(externalFD):
# raise SkipTest, or set False!
avail = ifExternalFDAvailable(externalFD) and False
with compat_contextlib_suppress(Exception):
avail = FFmpegPostProcessor(downloader=None).available
return unittest.skipUnless(
avail, externalFD.get_basename() + ' not found')
@ifFFmpegFDAvailable(FFmpegFD)
class TestFFmpegFD(unittest.TestCase): class TestFFmpegFD(unittest.TestCase):
_args = [] _args = []

View File

@ -2421,29 +2421,26 @@ except ImportError: # Python 2
compat_urllib_request_urlretrieve = compat_urlretrieve compat_urllib_request_urlretrieve = compat_urlretrieve
try: try:
from HTMLParser import (
HTMLParser as compat_HTMLParser,
HTMLParseError as compat_HTMLParseError)
except ImportError: # Python 3
from html.parser import HTMLParser as compat_HTMLParser from html.parser import HTMLParser as compat_HTMLParser
except ImportError: # Python 2
from HTMLParser import HTMLParser as compat_HTMLParser
compat_html_parser_HTMLParser = compat_HTMLParser
try: # Python 2
from HTMLParser import HTMLParseError as compat_HTMLParseError
except ImportError: # Python <3.4
try: try:
from html.parser import HTMLParseError as compat_HTMLParseError from html.parser import HTMLParseError as compat_HTMLParseError
except ImportError: # Python >3.4 except ImportError: # Python >3.4
# HTMLParseError was deprecated in Python 3.3 and removed in
# HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exception handling # and uniform cross-version exception handling
class compat_HTMLParseError(Exception): class compat_HTMLParseError(Exception):
pass pass
compat_html_parser_HTMLParser = compat_HTMLParser
compat_html_parser_HTMLParseError = compat_HTMLParseError compat_html_parser_HTMLParseError = compat_HTMLParseError
try: try:
from subprocess import DEVNULL _DEVNULL = subprocess.DEVNULL
compat_subprocess_get_DEVNULL = lambda: DEVNULL compat_subprocess_get_DEVNULL = lambda: _DEVNULL
except ImportError: except AttributeError:
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
try: try:
@ -2943,6 +2940,51 @@ else:
compat_socket_create_connection = socket.create_connection compat_socket_create_connection = socket.create_connection
try:
from contextlib import suppress as compat_contextlib_suppress
except ImportError:
class compat_contextlib_suppress(object):
_exceptions = None
def __init__(self, *exceptions):
super(compat_contextlib_suppress, self).__init__()
# TODO: [Base]ExceptionGroup (3.12+)
self._exceptions = exceptions
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
return exc_val is not None and isinstance(exc_val, self._exceptions or tuple())
# subprocess.Popen context manager
# avoids leaking handles if .communicate() is not called
try:
_Popen = subprocess.Popen
# check for required context manager attributes
_Popen.__enter__ and _Popen.__exit__
compat_subprocess_Popen = _Popen
except AttributeError:
# not a context manager - make one
from contextlib import contextmanager
@contextmanager
def compat_subprocess_Popen(*args, **kwargs):
popen = None
try:
popen = _Popen(*args, **kwargs)
yield popen
finally:
if popen:
for f in (popen.stdin, popen.stdout, popen.stderr):
if f:
# repeated .close() is OK, but just in case
with compat_contextlib_suppress(EnvironmentError):
f.close()
popen.wait()
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 # Fix https://github.com/ytdl-org/youtube-dl/issues/4223
# See http://bugs.python.org/issue9161 for what is broken # See http://bugs.python.org/issue9161 for what is broken
def workaround_optparse_bug9161(): def workaround_optparse_bug9161():
@ -3263,6 +3305,7 @@ __all__ = [
'compat_http_cookiejar_Cookie', 'compat_http_cookiejar_Cookie',
'compat_http_cookies', 'compat_http_cookies',
'compat_http_cookies_SimpleCookie', 'compat_http_cookies_SimpleCookie',
'compat_contextlib_suppress',
'compat_ctypes_WINFUNCTYPE', 'compat_ctypes_WINFUNCTYPE',
'compat_etree_fromstring', 'compat_etree_fromstring',
'compat_filter', 'compat_filter',
@ -3298,6 +3341,7 @@ __all__ = [
'compat_struct_pack', 'compat_struct_pack',
'compat_struct_unpack', 'compat_struct_unpack',
'compat_subprocess_get_DEVNULL', 'compat_subprocess_get_DEVNULL',
'compat_subprocess_Popen',
'compat_tokenize_tokenize', 'compat_tokenize_tokenize',
'compat_urllib_error', 'compat_urllib_error',
'compat_urllib_parse', 'compat_urllib_parse',

View File

@ -11,8 +11,14 @@ from .common import FileDownloader
from ..compat import ( from ..compat import (
compat_setenv, compat_setenv,
compat_str, compat_str,
compat_subprocess_Popen,
) )
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
try:
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
except ImportError:
FFmpegPostProcessor = None
from ..utils import ( from ..utils import (
cli_option, cli_option,
cli_valueless_option, cli_valueless_option,
@ -361,13 +367,14 @@ class FFmpegFD(ExternalFD):
@classmethod @classmethod
def available(cls): def available(cls):
return FFmpegPostProcessor().available # actual availability can only be confirmed for an instance
return bool(FFmpegPostProcessor)
def _call_downloader(self, tmpfilename, info_dict): def _call_downloader(self, tmpfilename, info_dict):
url = info_dict['url'] # `downloader` means the parent `YoutubeDL`
ffpp = FFmpegPostProcessor(downloader=self) ffpp = FFmpegPostProcessor(downloader=self.ydl)
if not ffpp.available: if not ffpp.available:
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') self.report_error('ffmpeg required for download but no ffmpeg (nor avconv) executable could be found. Please install one.')
return False return False
ffpp.check_version() ffpp.check_version()
@ -396,6 +403,7 @@ class FFmpegFD(ExternalFD):
# if end_time: # if end_time:
# args += ['-t', compat_str(end_time - start_time)] # args += ['-t', compat_str(end_time - start_time)]
url = info_dict['url']
cookies = self.ydl.cookiejar.get_cookies_for_url(url) cookies = self.ydl.cookiejar.get_cookies_for_url(url)
if cookies: if cookies:
args.extend(['-cookies', ''.join( args.extend(['-cookies', ''.join(
@ -483,21 +491,25 @@ class FFmpegFD(ExternalFD):
self._debug_cmd(args) self._debug_cmd(args)
proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) # From [1], a PIPE opened in Popen() should be closed, unless
try: # .communicate() is called. Avoid leaking any PIPEs by using Popen
retval = proc.wait() # as a context manager (newer Python 3.x and compat)
except BaseException as e: # Fixes "Resource Warning" in test/test_downloader_external.py
# subprocess.run would send the SIGKILL signal to ffmpeg and the # [1] https://devpress.csdn.net/python/62fde12d7e66823466192e48.html
# mp4 file couldn't be played, but if we ask ffmpeg to quit it with compat_subprocess_Popen(args, stdin=subprocess.PIPE, env=env) as proc:
# produces a file that is playable (this is mostly useful for live try:
# streams). Note that Windows is not affected and produces playable retval = proc.wait()
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300). except BaseException as e:
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32': # subprocess.run would send the SIGKILL signal to ffmpeg and the
process_communicate_or_kill(proc, b'q') # mp4 file couldn't be played, but if we ask ffmpeg to quit it
else: # produces a file that is playable (this is mostly useful for live
proc.kill() # streams). Note that Windows is not affected and produces playable
proc.wait() # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
raise if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
process_communicate_or_kill(proc, b'q')
else:
proc.kill()
raise
return retval return retval

View File

@ -0,0 +1,78 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from .common import (
InfoExtractor,
compat_urllib_parse_unquote,
)
class DouyinIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.douyin.com/video/6961737553342991651',
'md5': 'f0114e6688442972d80fab1083543197',
'info_dict': {
'id': '6961737553342991651',
'ext': 'mp4',
'title': '#杨超越 小小水手带你去远航❤️',
}
}, {
'url': 'https://www.douyin.com/video/6982497745948921092',
'md5': 'bdc8a6b4ce22c887e0064d2813befa27',
'info_dict': {
'id': '6982497745948921092',
'ext': 'mp4',
'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
}
}, {
'url': 'https://www.douyin.com/video/6953975910773099811',
'md5': 'dde3302460f19db59c47060ff013b902',
'info_dict': {
'id': '6953975910773099811',
'ext': 'mp4',
'title': '#一起看海 出现在你的夏日里',
}
}, {
'url': 'https://www.douyin.com/video/6950251282489675042',
'md5': 'f61844399e85f0ff18cfab91d20fe326',
'info_dict': {
'id': '6950251282489675042',
'ext': 'mp4',
'title': '哈哈哈,成功了哈哈哈哈哈哈',
}
}, {
'url': 'https://www.douyin.com/video/6963263655114722595',
'md5': '30a852b6efb232df202b80894f51422d',
'info_dict': {
'id': '6963263655114722595',
'ext': 'mp4',
'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
# The video player and video source are rendered client-side, the data
# contains in a <script id="RENDER_DATA" type="application/json"> tag
# quoted, unquote the whole page content then search using regex with
# regular string.
webpage = compat_urllib_parse_unquote(webpage)
# As of today, this seems good enough to pinpoint the title
title = self._html_search_regex(r'"desc":"([^"]*)"', webpage, 'title')
# video URLs are in this pattern {"src":"THE_URL"}, in json format,
# as a list of CDN urls, all serving the same file
urls = json.loads(
self._html_search_regex(r'"playAddr":(\[.*?\])', webpage, 'urls'))
return {
'id': video_id,
'title': title,
'url': 'https:' + urls[0]['src'],
'ext': 'mp4',
}

View File

@ -334,6 +334,7 @@ from .discoverynetworks import DiscoveryNetworksDeIE
from .discoveryvr import DiscoveryVRIE from .discoveryvr import DiscoveryVRIE
from .disney import DisneyIE from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE from .dispeak import DigitallySpeakingIE
from .douyin import DouyinIE
from .dropbox import DropboxIE from .dropbox import DropboxIE
from .dw import ( from .dw import (
DWIE, DWIE,
@ -1653,7 +1654,15 @@ from .younow import (
YouNowChannelIE, YouNowChannelIE,
YouNowMomentIE, YouNowMomentIE,
) )
from .youporn import YouPornIE from .youporn import (
YouPornIE,
YouPornCategoryIE,
YouPornChannelIE,
YouPornCollectionIE,
YouPornStarIE,
YouPornTagIE,
YouPornVideosIE,
)
from .yourporn import YourPornIE from .yourporn import YourPornIE
from .yourupload import YourUploadIE from .yourupload import YourUploadIE
from .youtube import ( from .youtube import (

View File

@ -1,20 +1,38 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
import re import re
from time import sleep
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
extract_attributes, extract_attributes,
ExtractorError,
get_element_by_class,
get_element_by_id,
int_or_none, int_or_none,
str_to_int, merge_dicts,
parse_count,
parse_qs,
T,
traverse_obj,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
urljoin,
) )
class YouPornIE(InfoExtractor): class YouPornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?' _VALID_URL = (
r'youporn:(?P<id>\d+)',
r'''(?x)
https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)
(?:/(?:(?P<display_id>[^/?#&]+)/?)?)?(?:[#?]|$)
'''
)
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
_TESTS = [{ _TESTS = [{
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'md5': '3744d24c50438cf5b6f6d59feb5055c2', 'md5': '3744d24c50438cf5b6f6d59feb5055c2',
@ -34,7 +52,7 @@ class YouPornIE(InfoExtractor):
'tags': list, 'tags': list,
'age_limit': 18, 'age_limit': 18,
}, },
'skip': 'This video has been disabled', 'skip': 'This video has been deactivated',
}, { }, {
# Unknown uploader # Unknown uploader
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4', 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
@ -66,57 +84,104 @@ class YouPornIE(InfoExtractor):
}, { }, {
'url': 'https://www.youporn.com/watch/13922959/femdom-principal/', 'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.youporn.com/watch/16290308/tinderspecial-trailer1/',
'info_dict': {
'id': '16290308',
'age_limit': 18,
'categories': [],
'description': None, # SEO spam using title removed
'display_id': 'tinderspecial-trailer1',
'duration': 298.0,
'ext': 'mp4',
'upload_date': '20201123',
'uploader': 'Ersties',
'tags': [],
'thumbnail': 'https://fi1.ypncdn.com/m=eaSaaTbWx/202011/23/16290308/original/3.jpg',
'timestamp': 1606147564,
'title': 'Tinder In Real Life',
'view_count': int,
}
}] }]
@staticmethod @classmethod
def _extract_urls(webpage): def _extract_urls(cls, webpage):
return re.findall( def yield_urls():
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)', for p in cls._EMBED_REGEX:
webpage) for m in re.finditer(p, webpage):
yield m.group('url')
return list(yield_urls())
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) # A different video ID (data-video-id) is hidden in the page but
video_id = mobj.group('id') # never seems to be used
display_id = mobj.group('display_id') or video_id video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
url = 'http://www.youporn.com/watch/%s' % (video_id,)
webpage = self._download_webpage(
url, video_id, headers={'Cookie': 'age_verified=1'})
definitions = self._download_json( watchable = self._search_regex(
'https://www.youporn.com/api/video/media_definitions/%s/' % video_id, r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''',
display_id) webpage, 'watchability', default=None)
if not watchable:
msg = re.split(r'\s{4}', clean_html(get_element_by_id(
'mainContent', webpage)) or '')[0]
raise ExtractorError(
('%s says: %s' % (self.IE_NAME, msg))
if msg else 'Video unavailable: no reason found',
expected=True)
# internal ID ?
# video_id = extract_attributes(watchable).get('data-video-id')
playervars = self._search_json(
r'\bplayervars\s*:', webpage, 'playervars', video_id)
def get_fmt(x):
v_url = url_or_none(x.get('videoUrl'))
if v_url:
x['videoUrl'] = v_url
return (x['format'], x)
defs_by_format = dict(traverse_obj(playervars, (
'mediaDefinitions', lambda _, v: v.get('format'), T(get_fmt))))
def get_format_data(f):
if f not in defs_by_format:
return []
return self._download_json(
defs_by_format[f]['videoUrl'], video_id, '{0}-formats'.format(f))
formats = [] formats = []
for definition in definitions: # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
if not isinstance(definition, dict): for hls_url in traverse_obj(
continue get_format_data('hls'),
video_url = url_or_none(definition.get('videoUrl')) (lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'),
if not video_url: (Ellipsis, 'videoUrl')):
continue formats.extend(self._extract_m3u8_formats(
f = { hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls',
'url': video_url, entry_protocol='m3u8_native'))
'filesize': int_or_none(definition.get('videoSize')),
} for f in traverse_obj(get_format_data('mp4'), (
height = int_or_none(definition.get('quality')) lambda _, v: v.get('videoUrl'), {
'url': ('videoUrl', T(url_or_none)),
'filesize': ('videoSize', T(int_or_none)),
'height': ('quality', T(int_or_none)),
}, T(lambda x: x.get('videoUrl') and x))):
# Video URL's path looks like this: # Video URL's path looks like this:
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4 # /videos/201703/11/109285532/1080P_4000K_109285532.mp4
# We will benefit from it by extracting some metadata # We will benefit from it by extracting some metadata
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url) mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', f['videoUrl'])
if mobj: if mobj:
if not height: if not f.get('height'):
height = int(mobj.group('height')) f['height'] = int(mobj.group('height'))
bitrate = int(mobj.group('bitrate')) f['tbr'] = int(mobj.group('bitrate'))
f.update({ f['format_id'] = '%dp-%dk' % (f['height'], f['tbr'])
'format_id': '%dp-%dk' % (height, bitrate),
'tbr': bitrate,
})
f['height'] = height
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
webpage = self._download_webpage(
'http://www.youporn.com/watch/%s' % video_id, display_id,
headers={'Cookie': 'age_verified=1'})
title = self._html_search_regex( title = self._html_search_regex(
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>', r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
webpage, 'title', default=None) or self._og_search_title( webpage, 'title', default=None) or self._og_search_title(
@ -131,8 +196,10 @@ class YouPornIE(InfoExtractor):
thumbnail = self._search_regex( thumbnail = self._search_regex(
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1', r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
webpage, 'thumbnail', fatal=False, group='thumbnail') webpage, 'thumbnail', fatal=False, group='thumbnail')
duration = int_or_none(self._html_search_meta( duration = traverse_obj(playervars, ('duration', T(int_or_none)))
'video:duration', webpage, 'duration', fatal=False)) if duration is None:
duration = int_or_none(self._html_search_meta(
'video:duration', webpage, 'duration', fatal=False))
uploader = self._html_search_regex( uploader = self._html_search_regex(
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
@ -148,11 +215,11 @@ class YouPornIE(InfoExtractor):
view_count = None view_count = None
views = self._search_regex( views = self._search_regex(
r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage, r'(<div\s[^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>',
'views', default=None) webpage, 'views', default=None)
if views: if views:
view_count = str_to_int(extract_attributes(views).get('data-value')) view_count = parse_count(extract_attributes(views).get('data-value'))
comment_count = str_to_int(self._search_regex( comment_count = parse_count(self._search_regex(
r'>All [Cc]omments? \(([\d,.]+)\)', r'>All [Cc]omments? \(([\d,.]+)\)',
webpage, 'comment count', default=None)) webpage, 'comment count', default=None))
@ -168,7 +235,10 @@ class YouPornIE(InfoExtractor):
r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>', r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
'tags') 'tags')
return { data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False) or {}
data.pop('url', None)
result = merge_dicts(data, {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
@ -183,4 +253,442 @@ class YouPornIE(InfoExtractor):
'tags': tags, 'tags': tags,
'age_limit': age_limit, 'age_limit': age_limit,
'formats': formats, 'formats': formats,
} })
# Remove promotional non-description
if result.get('description', '').startswith(
'Watch %s online' % (result['title'],)):
del result['description']
return result
class YouPornListBase(InfoExtractor):
# pattern in '.title-text' element of page section containing videos
_PLAYLIST_TITLEBAR_RE = r'\s+[Vv]ideos\s*$'
_PAGE_RETRY_COUNT = 0 # ie, no retry
_PAGE_RETRY_DELAY = 2 # seconds
def _get_next_url(self, url, pl_id, html):
return urljoin(url, self._search_regex(
r'''<a\s[^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''',
get_element_by_id('next', html) or '', 'next page',
group='url', default=None))
@classmethod
def _get_title_from_slug(cls, title_slug):
return re.sub(r'[_-]', ' ', title_slug)
def _entries(self, url, pl_id, html=None, page_num=None):
# separates page sections
PLAYLIST_SECTION_RE = (
r'''<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?title-bar(?:\s+[\w$-]+|\s)*\1[^>]*>'''
)
# contains video link
VIDEO_URL_RE = r'''(?x)
<div\s[^>]*\bdata-video-id\s*=\s*('|")\d+\1[^>]*>\s*
(?:<div\b[\s\S]+?</div>\s*)*
<a\s[^>]*\bhref\s*=\s*('|")(?P<url>(?:(?!\2)[^>])+)\2
'''
def yield_pages(url, html=html, page_num=page_num):
fatal = not html
for pnum in itertools.count(start=page_num or 1):
if not html:
html = self._download_webpage(
url, pl_id, note='Downloading page %d' % pnum,
fatal=fatal)
if not html:
break
fatal = False
yield (url, html, pnum)
# explicit page: extract just that page
if page_num is not None:
break
next_url = self._get_next_url(url, pl_id, html)
if not next_url or next_url == url:
break
url, html = next_url, None
def retry_page(msg, tries_left, page_data):
if tries_left <= 0:
return
self.report_warning(msg, pl_id)
sleep(self._PAGE_RETRY_DELAY)
return next(
yield_pages(page_data[0], page_num=page_data[2]), None)
def yield_entries(html):
for frag in re.split(PLAYLIST_SECTION_RE, html):
if not frag:
continue
t_text = get_element_by_class('title-text', frag or '')
if not (t_text and re.search(self._PLAYLIST_TITLEBAR_RE, t_text)):
continue
for m in re.finditer(VIDEO_URL_RE, frag):
video_url = urljoin(url, m.group('url'))
if video_url:
yield self.url_result(video_url)
last_first_url = None
for page_data in yield_pages(url, html=html, page_num=page_num):
# page_data: url, html, page_num
first_url = None
tries_left = self._PAGE_RETRY_COUNT + 1
while tries_left > 0:
tries_left -= 1
for from_ in yield_entries(page_data[1]):
# may get the same page twice instead of empty page
# or (site bug) intead of actual next page
if not first_url:
first_url = from_['url']
if first_url == last_first_url:
# sometimes (/porntags/) the site serves the previous page
# instead but may provide the correct page after a delay
page_data = retry_page(
'Retrying duplicate page...', tries_left, page_data)
if page_data:
first_url = None
break
continue
yield from_
else:
if not first_url and 'no-result-paragarph1' in page_data[1]:
page_data = retry_page(
'Retrying empty page...', tries_left, page_data)
if page_data:
continue
else:
# success/failure
break
# may get an infinite (?) sequence of empty pages
if not first_url:
break
last_first_url = first_url
def _real_extract(self, url, html=None):
# exceptionally, id may be None
m_dict = self._match_valid_url(url).groupdict()
pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort'))
qs = parse_qs(url)
for q, v in qs.items():
if v:
qs[q] = v[-1]
else:
del qs[q]
base_id = pl_id or 'YouPorn'
title = self._get_title_from_slug(base_id)
if page_type:
title = '%s %s' % (page_type.capitalize(), title)
base_id = [base_id.lower()]
if sort is None:
title += ' videos'
else:
title = '%s videos by %s' % (title, re.sub(r'[_-]', ' ', sort))
base_id.append(sort)
if qs:
ps = ['%s=%s' % item for item in sorted(qs.items())]
title += ' (%s)' % ','.join(ps)
base_id.extend(ps)
pl_id = '/'.join(base_id)
return self.playlist_result(
self._entries(url, pl_id, html=html,
page_num=int_or_none(qs.get('page'))),
playlist_id=pl_id, playlist_title=title)
class YouPornCategoryIE(YouPornListBase):
IE_DESC = 'YouPorn category, with sorting, filtering and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?P<type>category)/(?P<id>[^/?#&]+)
(?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$)
'''
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/category/lingerie/popular/',
'info_dict': {
'id': 'lingerie/popular',
'title': 'Category lingerie videos by popular',
},
'playlist_mincount': 39,
}, {
'note': 'Filtered paginated list with single page result',
'url': 'https://www.youporn.com/category/lingerie/duration/?min_minutes=10',
'info_dict': {
'id': 'lingerie/duration/min_minutes=10',
'title': 'Category lingerie videos by duration (min_minutes=10)',
},
'playlist_maxcount': 30,
}, {
'note': 'Single page of full list',
'url': 'https://www.youporn.com/category/lingerie/popular?page=1',
'info_dict': {
'id': 'lingerie/popular/page=1',
'title': 'Category lingerie videos by popular (page=1)',
},
'playlist_count': 30,
}]
class YouPornChannelIE(YouPornListBase):
IE_DESC = 'YouPorn channel, with sorting and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?P<type>channel)/(?P<id>[^/?#&]+)
(?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
'''
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/channel/x-feeds/',
'info_dict': {
'id': 'x-feeds',
'title': 'Channel X-Feeds videos',
},
'playlist_mincount': 37,
}, {
'note': 'Single page of full list (no filters here)',
'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1',
'info_dict': {
'id': 'x-feeds/duration/page=1',
'title': 'Channel X-Feeds videos by duration (page=1)',
},
'playlist_count': 24,
}]
@staticmethod
def _get_title_from_slug(title_slug):
return re.sub(r'_', ' ', title_slug).title()
class YouPornCollectionIE(YouPornListBase):
IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?P<type>collection)s/videos/(?P<id>\d+)
(?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$)
'''
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+in\s'
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/collections/videos/33044251/',
'info_dict': {
'id': '33044251',
'title': 'Collection Sexy Lips videos',
'uploader': 'ph-littlewillyb',
},
'playlist_mincount': 50,
}, {
'note': 'Single page of full list (no filters here)',
'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1',
'info_dict': {
'id': '33044251/time/page=1',
'title': 'Collection Sexy Lips videos by time (page=1)',
'uploader': 'ph-littlewillyb',
},
'playlist_count': 20,
}]
def _real_extract(self, url):
pl_id = self._match_id(url)
html = self._download_webpage(url, pl_id)
playlist = super(YouPornCollectionIE, self)._real_extract(url, html=html)
infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class(
'collection-infos', html)) or '')
title, uploader = self._search_regex(
r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)',
infos, 'title/uploader', group=('title', 'uploader'), default=(None, None))
return merge_dicts({
'title': playlist['title'].replace(playlist['id'].split('/')[0], title),
'uploader': uploader,
}, playlist) if title else playlist
class YouPornTagIE(YouPornListBase):
IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
porn(?P<type>tag)s/(?P<id>[^/?#&]+)
(?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$)
'''
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+tagged\s'
_PAGE_RETRY_COUNT = 1
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/porntags/austrian',
'info_dict': {
'id': 'austrian',
'title': 'Tag austrian videos',
},
'playlist_mincount': 35,
'expected_warnings': ['Retrying duplicate page'],
}, {
'note': 'Filtered paginated list with single page result',
'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10',
'info_dict': {
'id': 'austrian/duration/min_minutes=10',
'title': 'Tag austrian videos by duration (min_minutes=10)',
},
# number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3,
# or more, varying with number of ads; let's set max as 9x4
# NB col 1 may not be shown in non-JS page with site CSS and zoom 100%
'playlist_maxcount': 32,
'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
}, {
'note': 'Single page of full list',
'url': 'https://www.youporn.com/porntags/austrian/?page=1',
'info_dict': {
'id': 'austrian/page=1',
'title': 'Tag austrian videos (page=1)',
},
'playlist_mincount': 32,
'playlist_maxcount': 34,
'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
}]
# YP tag navigation is broken, loses sort
def _get_next_url(self, url, pl_id, html):
next_url = super(YouPornTagIE, self)._get_next_url(url, pl_id, html)
if next_url:
n = self._match_valid_url(next_url)
if n:
s = n.groupdict().get('sort')
if s:
u = self._match_valid_url(url)
if u:
u = u.groupdict().get('sort')
if s and not u:
n = n.end('sort')
next_url = next_url[:n] + '/' + u + next_url[n:]
return next_url
class YouPornStarIE(YouPornListBase):
IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?P<type>pornstar)/(?P<id>[^/?#&]+)
(?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
'''
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+[fF]eaturing\s'
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/pornstar/daynia/',
'info_dict': {
'id': 'daynia',
'title': 'Pornstar Daynia videos',
'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+',
},
'playlist_mincount': 45,
}, {
'note': 'Single page of full list (no filters here)',
'url': 'https://www.youporn.com/pornstar/daynia/?page=1',
'info_dict': {
'id': 'daynia/page=1',
'title': 'Pornstar Daynia videos (page=1)',
'description': 're:.{180,}',
},
'playlist_count': 26,
}]
@staticmethod
def _get_title_from_slug(title_slug):
return re.sub(r'_', ' ', title_slug).title()
def _real_extract(self, url):
pl_id = self._match_id(url)
html = self._download_webpage(url, pl_id)
playlist = super(YouPornStarIE, self)._real_extract(url, html=html)
INFO_ELEMENT_RE = r'''(?x)
<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*>
(?P<info>[\s\S]+?)(?:</div>\s*){6,}
'''
infos = self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default='')
if infos:
infos = re.sub(
r'(?:\s*nl=nl)+\s*', ' ',
re.sub(r'(?u)\s+', ' ', clean_html(
re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '')
return merge_dicts({
'description': infos.strip() or None,
}, playlist)
class YouPornVideosIE(YouPornListBase):
IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?:(?P<id>browse)/)?
(?P<sort>(?(id)
(?:duration|rating|time|views)|
(?:most_(?:favou?rit|view)ed|recommended|top_rated)?))
(?:[/#?]|$)
'''
_PLAYLIST_TITLEBAR_RE = r'\s+(?:[Vv]ideos|VIDEOS)\s*$'
_TESTS = [{
'note': 'Full list with pagination (too long for test)',
'url': 'https://www.youporn.com/',
'info_dict': {
'id': 'youporn',
'title': 'YouPorn videos',
},
'only_matching': True,
}, {
'note': 'Full list with pagination (too long for test)',
'url': 'https://www.youporn.com/recommended',
'info_dict': {
'id': 'youporn/recommended',
'title': 'YouPorn videos by recommended',
},
'only_matching': True,
}, {
'note': 'Full list with pagination (too long for test)',
'url': 'https://www.youporn.com/top_rated',
'info_dict': {
'id': 'youporn/top_rated',
'title': 'YouPorn videos by top rated',
},
'only_matching': True,
}, {
'note': 'Full list with pagination (too long for test)',
'url': 'https://www.youporn.com/browse/time',
'info_dict': {
'id': 'browse/time',
'title': 'YouPorn videos by time',
},
'only_matching': True,
}, {
'note': 'Filtered paginated list with single page result',
'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2',
'info_dict': {
'id': 'youporn/most_favorited/max_minutes=2/res=VR',
'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)',
},
'playlist_mincount': 10,
'playlist_maxcount': 28,
}, {
'note': 'Filtered paginated list with several pages',
'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5',
'info_dict': {
'id': 'youporn/most_favorited/max_minutes=5/res=VR',
'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)',
},
'playlist_mincount': 45,
}, {
'note': 'Single page of full list',
'url': 'https://www.youporn.com/browse/time?page=1',
'info_dict': {
'id': 'browse/time/page=1',
'title': 'YouPorn videos by time (page=1)',
},
'playlist_count': 36,
}]
@staticmethod
def _get_title_from_slug(title_slug):
return 'YouPorn' if title_slug == 'browse' else title_slug

View File

@ -1647,10 +1647,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
except JSInterpreter.Exception as e: except JSInterpreter.Exception as e:
self.report_warning( self.report_warning(
'%s (%s %s)' % ( '%s (%s %s)' % (
self.__ie_msg( 'Unable to decode n-parameter: download likely to be throttled',
'Unable to decode n-parameter: download likely to be throttled'),
error_to_compat_str(e), error_to_compat_str(e),
traceback.format_exc())) traceback.format_exc()),
video_id=video_id)
return return
self.write_debug('Decrypted nsig {0} => {1}'.format(n, ret)) self.write_debug('Decrypted nsig {0} => {1}'.format(n, ret))

View File

@ -74,8 +74,11 @@ class FFmpegPostProcessor(PostProcessor):
return FFmpegPostProcessor(downloader)._versions return FFmpegPostProcessor(downloader)._versions
def _determine_executables(self): def _determine_executables(self):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] # ordered to match prefer_ffmpeg!
convs = ['ffmpeg', 'avconv']
probes = ['ffprobe', 'avprobe']
prefer_ffmpeg = True prefer_ffmpeg = True
programs = convs + probes
def get_ffmpeg_version(path): def get_ffmpeg_version(path):
ver = get_exe_version(path, args=['-version']) ver = get_exe_version(path, args=['-version'])
@ -96,6 +99,7 @@ class FFmpegPostProcessor(PostProcessor):
self._paths = None self._paths = None
self._versions = None self._versions = None
location = None
if self._downloader: if self._downloader:
prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True) prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
location = self._downloader.params.get('ffmpeg_location') location = self._downloader.params.get('ffmpeg_location')
@ -118,33 +122,21 @@ class FFmpegPostProcessor(PostProcessor):
location = os.path.dirname(os.path.abspath(location)) location = os.path.dirname(os.path.abspath(location))
if basename in ('ffmpeg', 'ffprobe'): if basename in ('ffmpeg', 'ffprobe'):
prefer_ffmpeg = True prefer_ffmpeg = True
self._paths = dict(
(p, p if location is None else os.path.join(location, p))
for p in programs)
self._versions = dict(
x for x in (
(p, get_ffmpeg_version(self._paths[p])) for p in programs)
if x[1] is not None)
self._paths = dict( basenames = [None, None]
(p, os.path.join(location, p)) for p in programs) for i, progs in enumerate((convs, probes)):
self._versions = dict( for p in progs[::-1 if prefer_ffmpeg is False else 1]:
(p, get_ffmpeg_version(self._paths[p])) for p in programs) if self._versions.get(p):
if self._versions is None: basenames[i] = p
self._versions = dict( break
(p, get_ffmpeg_version(p)) for p in programs) self.basename, self.probe_basename = basenames
self._paths = dict((p, p) for p in programs)
if prefer_ffmpeg is False:
prefs = ('avconv', 'ffmpeg')
else:
prefs = ('ffmpeg', 'avconv')
for p in prefs:
if self._versions[p]:
self.basename = p
break
if prefer_ffmpeg is False:
prefs = ('avprobe', 'ffprobe')
else:
prefs = ('ffprobe', 'avprobe')
for p in prefs:
if self._versions[p]:
self.probe_basename = p
break
@property @property
def available(self): def available(self):

View File

@ -45,6 +45,7 @@ from .compat import (
compat_casefold, compat_casefold,
compat_chr, compat_chr,
compat_collections_abc, compat_collections_abc,
compat_contextlib_suppress,
compat_cookiejar, compat_cookiejar,
compat_ctypes_WINFUNCTYPE, compat_ctypes_WINFUNCTYPE,
compat_datetime_timedelta_total_seconds, compat_datetime_timedelta_total_seconds,
@ -1855,25 +1856,18 @@ def write_json_file(obj, fn):
try: try:
with tf: with tf:
json.dump(obj, tf) json.dump(obj, tf)
if sys.platform == 'win32': with compat_contextlib_suppress(OSError):
# Need to remove existing file on Windows, else os.rename raises if sys.platform == 'win32':
# WindowsError or FileExistsError. # Need to remove existing file on Windows, else os.rename raises
try: # WindowsError or FileExistsError.
os.unlink(fn) os.unlink(fn)
except OSError:
pass
try:
mask = os.umask(0) mask = os.umask(0)
os.umask(mask) os.umask(mask)
os.chmod(tf.name, 0o666 & ~mask) os.chmod(tf.name, 0o666 & ~mask)
except OSError:
pass
os.rename(tf.name, fn) os.rename(tf.name, fn)
except Exception: except Exception:
try: with compat_contextlib_suppress(OSError):
os.remove(tf.name) os.remove(tf.name)
except OSError:
pass
raise raise
@ -2033,14 +2027,13 @@ def extract_attributes(html_element):
NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
""" """
parser = HTMLAttributeParser() ret = None
try: # Older Python may throw HTMLParseError in case of malformed HTML (and on .close()!)
parser.feed(html_element) with compat_contextlib_suppress(compat_HTMLParseError):
parser.close() with contextlib.closing(HTMLAttributeParser()) as parser:
# Older Python may throw HTMLParseError in case of malformed HTML parser.feed(html_element)
except compat_HTMLParseError: ret = parser.attrs
pass return ret or {}
return parser.attrs
def clean_html(html): def clean_html(html):
@ -2241,7 +2234,8 @@ def _htmlentity_transform(entity_with_semicolon):
numstr = '0%s' % numstr numstr = '0%s' % numstr
else: else:
base = 10 base = 10
# See https://github.com/ytdl-org/youtube-dl/issues/7518 # See https://github.com/ytdl-org/youtube-dl/issues/7518\
# Also, weirdly, compat_contextlib_suppress fails here in 2.6
try: try:
return compat_chr(int(numstr, base)) return compat_chr(int(numstr, base))
except ValueError: except ValueError:
@ -2348,11 +2342,9 @@ def make_HTTPS_handler(params, **kwargs):
# Some servers may (wrongly) reject requests if ALPN extension is not sent. See: # Some servers may (wrongly) reject requests if ALPN extension is not sent. See:
# https://github.com/python/cpython/issues/85140 # https://github.com/python/cpython/issues/85140
# https://github.com/yt-dlp/yt-dlp/issues/3878 # https://github.com/yt-dlp/yt-dlp/issues/3878
try: with compat_contextlib_suppress(AttributeError, NotImplementedError):
# fails for Python < 2.7.10, not ssl.HAS_ALPN
ctx.set_alpn_protocols(ALPN_PROTOCOLS) ctx.set_alpn_protocols(ALPN_PROTOCOLS)
except (AttributeError, NotImplementedError):
# Python < 2.7.10, not ssl.HAS_ALPN
pass
opts_no_check_certificate = params.get('nocheckcertificate', False) opts_no_check_certificate = params.get('nocheckcertificate', False)
if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
@ -2362,12 +2354,10 @@ def make_HTTPS_handler(params, **kwargs):
context.check_hostname = False context.check_hostname = False
context.verify_mode = ssl.CERT_NONE context.verify_mode = ssl.CERT_NONE
try: with compat_contextlib_suppress(TypeError):
# Fails with Python 2.7.8 (create_default_context present
# but HTTPSHandler has no context=)
return YoutubeDLHTTPSHandler(params, context=context, **kwargs) return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
except TypeError:
# Python 2.7.8
# (create_default_context present but HTTPSHandler has no context=)
pass
if sys.version_info < (3, 2): if sys.version_info < (3, 2):
return YoutubeDLHTTPSHandler(params, **kwargs) return YoutubeDLHTTPSHandler(params, **kwargs)
@ -2381,15 +2371,24 @@ def make_HTTPS_handler(params, **kwargs):
return YoutubeDLHTTPSHandler(params, context=context, **kwargs) return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
def bug_reports_message(): def bug_reports_message(before=';'):
if ytdl_is_updateable(): if ytdl_is_updateable():
update_cmd = 'type youtube-dl -U to update' update_cmd = 'type youtube-dl -U to update'
else: else:
update_cmd = 'see https://yt-dl.org/update on how to update' update_cmd = 'see https://github.com/ytdl-org/youtube-dl/#user-content-installation on how to update'
msg = '; please report this issue on https://yt-dl.org/bug .'
msg += ' Make sure you are using the latest version; %s.' % update_cmd msg = (
msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 'please report this issue on https://github.com/ytdl-org/youtube-dl/issues ,'
return msg ' using the appropriate issue template.'
' Make sure you are using the latest version; %s.'
' Be sure to call youtube-dl with the --verbose option and include the complete output.'
) % update_cmd
before = (before or '').rstrip()
if not before or before.endswith(('.', '!', '?')):
msg = msg[0].title() + msg[1:]
return (before + ' ' if before else '') + msg
class YoutubeDLError(Exception): class YoutubeDLError(Exception):
@ -3176,12 +3175,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
if timezone is None: if timezone is None:
timezone, date_str = extract_timezone(date_str) timezone, date_str = extract_timezone(date_str)
try: with compat_contextlib_suppress(ValueError):
date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
dt = datetime.datetime.strptime(date_str, date_format) - timezone dt = datetime.datetime.strptime(date_str, date_format) - timezone
return calendar.timegm(dt.timetuple()) return calendar.timegm(dt.timetuple())
except ValueError:
pass
def date_formats(day_first=True): def date_formats(day_first=True):
@ -3201,17 +3198,13 @@ def unified_strdate(date_str, day_first=True):
_, date_str = extract_timezone(date_str) _, date_str = extract_timezone(date_str)
for expression in date_formats(day_first): for expression in date_formats(day_first):
try: with compat_contextlib_suppress(ValueError):
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
except ValueError:
pass
if upload_date is None: if upload_date is None:
timetuple = email.utils.parsedate_tz(date_str) timetuple = email.utils.parsedate_tz(date_str)
if timetuple: if timetuple:
try: with compat_contextlib_suppress(ValueError):
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
except ValueError:
pass
if upload_date is not None: if upload_date is not None:
return compat_str(upload_date) return compat_str(upload_date)
@ -3240,11 +3233,9 @@ def unified_timestamp(date_str, day_first=True):
date_str = m.group(1) date_str = m.group(1)
for expression in date_formats(day_first): for expression in date_formats(day_first):
try: with compat_contextlib_suppress(ValueError):
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta) dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
return calendar.timegm(dt.timetuple()) return calendar.timegm(dt.timetuple())
except ValueError:
pass
timetuple = email.utils.parsedate_tz(date_str) timetuple = email.utils.parsedate_tz(date_str)
if timetuple: if timetuple:
return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone) return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone)