Compare commits

...

13 Commits

Author SHA1 Message Date
Tobias Bell b41bfb22f0
Merge 8c8b07c4e9 into 668332b973 2024-04-24 10:07:56 -03:00
dirkf 668332b973 [YouPorn] Add playlist extractors
* YouPornCategoryIE
* YouPornChannelIE
* YouPornCollectionIE
* YouPornStarIE
* YouPornTagIE
* YouPornVideosIE,
2024-04-22 01:34:26 +01:00
dirkf 0b2ce3685e [YouPorn] Improve extraction
* detect unwatchable videos
* improve duration extraction
* fix count extraction and support large values
* detect and remove SEO spam boilerplate description
2024-04-22 01:34:26 +01:00
dirkf c2766cb80e [test/test_download] Support 'playlist_maxcount:count' expected value
* parallel to `playlist_mincount'
* specify both for a range of playlist lengths
* if max < min the test will always fail!
2024-04-22 01:34:26 +01:00
dirkf eb38665438 [YouPorn] Incorporate yt-dlp PR 8827
* from https://github.com/yt-dlp/yt-dlp/pull/8827
* extract from webpage instead of broken API URL
* thx The-MAGI
2024-04-22 01:34:26 +01:00
dirkf e0727e4ab6 [postprocessor/ffmpeg] Fix finding ffprobe (bug in 21792b8)
Fixes 21792b88b7 (commitcomment-140705274), thx: vonProteus
2024-04-07 15:33:30 +01:00
Ori Avtalion 4ea59c6107
[utils] Fix crash in _report_ignoring_subs from c58b655 (#32762)
Align `utils.bug_reports_message()` with yt-dlp https://github.com/yt-dlp/yt-dlp/commit/5873d4ccdd, thanks fstirlitz

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
2024-04-05 15:25:29 +01:00
dirkf 8c8b07c4e9
Small update, force CI 2023-02-03 04:11:27 +00:00
Tobias Bell 7b21482907 Should work now 2022-08-30 16:46:22 +02:00
Tobias Bell 325427ce17 In between 2022-08-30 13:34:55 +02:00
Tobias Bell 48f020cb9a
Apply suggestions from code review
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-08-30 12:58:39 +02:00
Tobias Bell 25fcfdf566 Remove unnecessary import 2022-08-29 11:58:14 +02:00
Tobias Bell 907a09cbf7 [w24at] Add new extractor 2022-08-29 11:56:06 +02:00
6 changed files with 648 additions and 59 deletions

View File

@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import ( from test.helper import (
assertGreaterEqual, assertGreaterEqual,
assertLessEqual,
expect_warnings, expect_warnings,
get_params, get_params,
gettestcases, gettestcases,
@ -122,7 +123,10 @@ def generator(test_case, tname):
params['outtmpl'] = tname + '_' + params['outtmpl'] params['outtmpl'] = tname + '_' + params['outtmpl']
if is_playlist and 'playlist' not in test_case: if is_playlist and 'playlist' not in test_case:
params.setdefault('extract_flat', 'in_playlist') params.setdefault('extract_flat', 'in_playlist')
params.setdefault('playlistend', test_case.get('playlist_mincount')) params.setdefault('playlistend',
test_case['playlist_maxcount'] + 1
if test_case.get('playlist_maxcount')
else test_case.get('playlist_mincount'))
params.setdefault('skip_download', True) params.setdefault('skip_download', True)
ydl = YoutubeDL(params, auto_init=False) ydl = YoutubeDL(params, auto_init=False)
@ -190,6 +194,14 @@ def generator(test_case, tname):
'Expected at least %d in playlist %s, but got only %d' % ( 'Expected at least %d in playlist %s, but got only %d' % (
test_case['playlist_mincount'], test_case['url'], test_case['playlist_mincount'], test_case['url'],
len(res_dict['entries']))) len(res_dict['entries'])))
if 'playlist_maxcount' in test_case:
assertLessEqual(
self,
len(res_dict['entries']),
test_case['playlist_maxcount'],
'Expected at most %d in playlist %s, but got %d' % (
test_case['playlist_maxcount'], test_case['url'],
len(res_dict['entries'])))
if 'playlist_count' in test_case: if 'playlist_count' in test_case:
self.assertEqual( self.assertEqual(
len(res_dict['entries']), len(res_dict['entries']),

View File

@ -1598,6 +1598,7 @@ from .wsj import (
WSJArticleIE, WSJArticleIE,
) )
from .wwe import WWEIE from .wwe import WWEIE
from .w24at import W24atIE
from .xbef import XBefIE from .xbef import XBefIE
from .xboxclips import XboxClipsIE from .xboxclips import XboxClipsIE
from .xfileshare import XFileShareIE from .xfileshare import XFileShareIE
@ -1653,7 +1654,15 @@ from .younow import (
YouNowChannelIE, YouNowChannelIE,
YouNowMomentIE, YouNowMomentIE,
) )
from .youporn import YouPornIE from .youporn import (
YouPornIE,
YouPornCategoryIE,
YouPornChannelIE,
YouPornCollectionIE,
YouPornStarIE,
YouPornTagIE,
YouPornVideosIE,
)
from .yourporn import YourPornIE from .yourporn import YourPornIE
from .yourupload import YourUploadIE from .yourupload import YourUploadIE
from .youtube import ( from .youtube import (

View File

@ -0,0 +1,45 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class W24atIE(InfoExtractor):
_VALID_URL = r'https://(?:www\.)?w24\.at/Video/.*/(?P<id>[0-9]+)'
_TEST = {
'url': 'https://www.w24.at/Video/Bewegung-macht-Spass-Folge-62-Kids-6/24828',
'md5': '16e6f1c5d4a0d54d420e3d9d122660a1',
'info_dict': {
'id': '24828',
'ext': 'mp4',
'title': 'Bewegung macht Spaß! - Folge 62: Kids 6',
'description': 'Stefans Ziel ist es Kindern auch hinter den Bildschirmen zur Bewegung und zum Denksport zu animieren und das ganze mit Spaß und Köpfchen zu verbinden.',
'thumbnail': r're:.*\.jpg$',
'uploader': 'W24'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
media_server = self._html_search_regex(r'var\s*mediaServer\s*=\s*\{.*"vod"\s*:\s*"([^"]+)"',
webpage, "vod", "ms02.w24.at")
mp4_path = self._html_search_regex(r"src:.*\+ '([^']+)'.*type:'video/mp4'",
webpage, "mp4_path")
m3u8_path = self._html_search_regex(r"src:.*\+ '([^']+)'.*type:'application/x-mpegURL'",
webpage, "m3u8_path")
formats = []
if mp4_path:
formats.append({'url': "https://%s%s" % (media_server, mp4_path), 'ext': 'mp4'})
formats.extend(self._extract_m3u8_formats("https://%s%s" % (media_server, m3u8_path), video_id, ext='mp4'))
self._sort_formats(formats)
return {
'id': video_id,
'title': re.sub(r'\s+-\sW24\s*$', '', self._og_search_title(webpage)),
'description': self._og_search_description(webpage),
'formats': formats,
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': self._og_search_property('site_name', webpage, fatal=False),
}

View File

@ -1,20 +1,38 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
import re import re
from time import sleep
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
extract_attributes, extract_attributes,
ExtractorError,
get_element_by_class,
get_element_by_id,
int_or_none, int_or_none,
str_to_int, merge_dicts,
parse_count,
parse_qs,
T,
traverse_obj,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
urljoin,
) )
class YouPornIE(InfoExtractor): class YouPornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?' _VALID_URL = (
r'youporn:(?P<id>\d+)',
r'''(?x)
https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)
(?:/(?:(?P<display_id>[^/?#&]+)/?)?)?(?:[#?]|$)
'''
)
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
_TESTS = [{ _TESTS = [{
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'md5': '3744d24c50438cf5b6f6d59feb5055c2', 'md5': '3744d24c50438cf5b6f6d59feb5055c2',
@ -34,7 +52,7 @@ class YouPornIE(InfoExtractor):
'tags': list, 'tags': list,
'age_limit': 18, 'age_limit': 18,
}, },
'skip': 'This video has been disabled', 'skip': 'This video has been deactivated',
}, { }, {
# Unknown uploader # Unknown uploader
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4', 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
@ -66,57 +84,104 @@ class YouPornIE(InfoExtractor):
}, { }, {
'url': 'https://www.youporn.com/watch/13922959/femdom-principal/', 'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.youporn.com/watch/16290308/tinderspecial-trailer1/',
'info_dict': {
'id': '16290308',
'age_limit': 18,
'categories': [],
'description': None, # SEO spam using title removed
'display_id': 'tinderspecial-trailer1',
'duration': 298.0,
'ext': 'mp4',
'upload_date': '20201123',
'uploader': 'Ersties',
'tags': [],
'thumbnail': 'https://fi1.ypncdn.com/m=eaSaaTbWx/202011/23/16290308/original/3.jpg',
'timestamp': 1606147564,
'title': 'Tinder In Real Life',
'view_count': int,
}
}] }]
@staticmethod @classmethod
def _extract_urls(webpage): def _extract_urls(cls, webpage):
return re.findall( def yield_urls():
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)', for p in cls._EMBED_REGEX:
webpage) for m in re.finditer(p, webpage):
yield m.group('url')
return list(yield_urls())
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) # A different video ID (data-video-id) is hidden in the page but
video_id = mobj.group('id') # never seems to be used
display_id = mobj.group('display_id') or video_id video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
url = 'http://www.youporn.com/watch/%s' % (video_id,)
webpage = self._download_webpage(
url, video_id, headers={'Cookie': 'age_verified=1'})
definitions = self._download_json( watchable = self._search_regex(
'https://www.youporn.com/api/video/media_definitions/%s/' % video_id, r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''',
display_id) webpage, 'watchability', default=None)
if not watchable:
msg = re.split(r'\s{4}', clean_html(get_element_by_id(
'mainContent', webpage)) or '')[0]
raise ExtractorError(
('%s says: %s' % (self.IE_NAME, msg))
if msg else 'Video unavailable: no reason found',
expected=True)
# internal ID ?
# video_id = extract_attributes(watchable).get('data-video-id')
playervars = self._search_json(
r'\bplayervars\s*:', webpage, 'playervars', video_id)
def get_fmt(x):
v_url = url_or_none(x.get('videoUrl'))
if v_url:
x['videoUrl'] = v_url
return (x['format'], x)
defs_by_format = dict(traverse_obj(playervars, (
'mediaDefinitions', lambda _, v: v.get('format'), T(get_fmt))))
def get_format_data(f):
if f not in defs_by_format:
return []
return self._download_json(
defs_by_format[f]['videoUrl'], video_id, '{0}-formats'.format(f))
formats = [] formats = []
for definition in definitions: # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
if not isinstance(definition, dict): for hls_url in traverse_obj(
continue get_format_data('hls'),
video_url = url_or_none(definition.get('videoUrl')) (lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'),
if not video_url: (Ellipsis, 'videoUrl')):
continue formats.extend(self._extract_m3u8_formats(
f = { hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls',
'url': video_url, entry_protocol='m3u8_native'))
'filesize': int_or_none(definition.get('videoSize')),
} for f in traverse_obj(get_format_data('mp4'), (
height = int_or_none(definition.get('quality')) lambda _, v: v.get('videoUrl'), {
'url': ('videoUrl', T(url_or_none)),
'filesize': ('videoSize', T(int_or_none)),
'height': ('quality', T(int_or_none)),
}, T(lambda x: x.get('videoUrl') and x))):
# Video URL's path looks like this: # Video URL's path looks like this:
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4 # /videos/201703/11/109285532/1080P_4000K_109285532.mp4
# We will benefit from it by extracting some metadata # We will benefit from it by extracting some metadata
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url) mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', f['videoUrl'])
if mobj: if mobj:
if not height: if not f.get('height'):
height = int(mobj.group('height')) f['height'] = int(mobj.group('height'))
bitrate = int(mobj.group('bitrate')) f['tbr'] = int(mobj.group('bitrate'))
f.update({ f['format_id'] = '%dp-%dk' % (f['height'], f['tbr'])
'format_id': '%dp-%dk' % (height, bitrate),
'tbr': bitrate,
})
f['height'] = height
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
webpage = self._download_webpage(
'http://www.youporn.com/watch/%s' % video_id, display_id,
headers={'Cookie': 'age_verified=1'})
title = self._html_search_regex( title = self._html_search_regex(
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>', r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
webpage, 'title', default=None) or self._og_search_title( webpage, 'title', default=None) or self._og_search_title(
@ -131,8 +196,10 @@ class YouPornIE(InfoExtractor):
thumbnail = self._search_regex( thumbnail = self._search_regex(
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1', r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
webpage, 'thumbnail', fatal=False, group='thumbnail') webpage, 'thumbnail', fatal=False, group='thumbnail')
duration = int_or_none(self._html_search_meta( duration = traverse_obj(playervars, ('duration', T(int_or_none)))
'video:duration', webpage, 'duration', fatal=False)) if duration is None:
duration = int_or_none(self._html_search_meta(
'video:duration', webpage, 'duration', fatal=False))
uploader = self._html_search_regex( uploader = self._html_search_regex(
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
@ -148,11 +215,11 @@ class YouPornIE(InfoExtractor):
view_count = None view_count = None
views = self._search_regex( views = self._search_regex(
r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage, r'(<div\s[^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>',
'views', default=None) webpage, 'views', default=None)
if views: if views:
view_count = str_to_int(extract_attributes(views).get('data-value')) view_count = parse_count(extract_attributes(views).get('data-value'))
comment_count = str_to_int(self._search_regex( comment_count = parse_count(self._search_regex(
r'>All [Cc]omments? \(([\d,.]+)\)', r'>All [Cc]omments? \(([\d,.]+)\)',
webpage, 'comment count', default=None)) webpage, 'comment count', default=None))
@ -168,7 +235,10 @@ class YouPornIE(InfoExtractor):
r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>', r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
'tags') 'tags')
return { data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False) or {}
data.pop('url', None)
result = merge_dicts(data, {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
@ -183,4 +253,442 @@ class YouPornIE(InfoExtractor):
'tags': tags, 'tags': tags,
'age_limit': age_limit, 'age_limit': age_limit,
'formats': formats, 'formats': formats,
} })
# Remove promotional non-description
if result.get('description', '').startswith(
'Watch %s online' % (result['title'],)):
del result['description']
return result
class YouPornListBase(InfoExtractor):
# pattern in '.title-text' element of page section containing videos
_PLAYLIST_TITLEBAR_RE = r'\s+[Vv]ideos\s*$'
_PAGE_RETRY_COUNT = 0 # ie, no retry
_PAGE_RETRY_DELAY = 2 # seconds
def _get_next_url(self, url, pl_id, html):
return urljoin(url, self._search_regex(
r'''<a\s[^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''',
get_element_by_id('next', html) or '', 'next page',
group='url', default=None))
@classmethod
def _get_title_from_slug(cls, title_slug):
return re.sub(r'[_-]', ' ', title_slug)
def _entries(self, url, pl_id, html=None, page_num=None):
# separates page sections
PLAYLIST_SECTION_RE = (
r'''<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?title-bar(?:\s+[\w$-]+|\s)*\1[^>]*>'''
)
# contains video link
VIDEO_URL_RE = r'''(?x)
<div\s[^>]*\bdata-video-id\s*=\s*('|")\d+\1[^>]*>\s*
(?:<div\b[\s\S]+?</div>\s*)*
<a\s[^>]*\bhref\s*=\s*('|")(?P<url>(?:(?!\2)[^>])+)\2
'''
def yield_pages(url, html=html, page_num=page_num):
fatal = not html
for pnum in itertools.count(start=page_num or 1):
if not html:
html = self._download_webpage(
url, pl_id, note='Downloading page %d' % pnum,
fatal=fatal)
if not html:
break
fatal = False
yield (url, html, pnum)
# explicit page: extract just that page
if page_num is not None:
break
next_url = self._get_next_url(url, pl_id, html)
if not next_url or next_url == url:
break
url, html = next_url, None
def retry_page(msg, tries_left, page_data):
if tries_left <= 0:
return
self.report_warning(msg, pl_id)
sleep(self._PAGE_RETRY_DELAY)
return next(
yield_pages(page_data[0], page_num=page_data[2]), None)
def yield_entries(html):
for frag in re.split(PLAYLIST_SECTION_RE, html):
if not frag:
continue
t_text = get_element_by_class('title-text', frag or '')
if not (t_text and re.search(self._PLAYLIST_TITLEBAR_RE, t_text)):
continue
for m in re.finditer(VIDEO_URL_RE, frag):
video_url = urljoin(url, m.group('url'))
if video_url:
yield self.url_result(video_url)
last_first_url = None
for page_data in yield_pages(url, html=html, page_num=page_num):
# page_data: url, html, page_num
first_url = None
tries_left = self._PAGE_RETRY_COUNT + 1
while tries_left > 0:
tries_left -= 1
for from_ in yield_entries(page_data[1]):
# may get the same page twice instead of empty page
# or (site bug) intead of actual next page
if not first_url:
first_url = from_['url']
if first_url == last_first_url:
# sometimes (/porntags/) the site serves the previous page
# instead but may provide the correct page after a delay
page_data = retry_page(
'Retrying duplicate page...', tries_left, page_data)
if page_data:
first_url = None
break
continue
yield from_
else:
if not first_url and 'no-result-paragarph1' in page_data[1]:
page_data = retry_page(
'Retrying empty page...', tries_left, page_data)
if page_data:
continue
else:
# success/failure
break
# may get an infinite (?) sequence of empty pages
if not first_url:
break
last_first_url = first_url
def _real_extract(self, url, html=None):
# exceptionally, id may be None
m_dict = self._match_valid_url(url).groupdict()
pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort'))
qs = parse_qs(url)
for q, v in qs.items():
if v:
qs[q] = v[-1]
else:
del qs[q]
base_id = pl_id or 'YouPorn'
title = self._get_title_from_slug(base_id)
if page_type:
title = '%s %s' % (page_type.capitalize(), title)
base_id = [base_id.lower()]
if sort is None:
title += ' videos'
else:
title = '%s videos by %s' % (title, re.sub(r'[_-]', ' ', sort))
base_id.append(sort)
if qs:
ps = ['%s=%s' % item for item in sorted(qs.items())]
title += ' (%s)' % ','.join(ps)
base_id.extend(ps)
pl_id = '/'.join(base_id)
return self.playlist_result(
self._entries(url, pl_id, html=html,
page_num=int_or_none(qs.get('page'))),
playlist_id=pl_id, playlist_title=title)
class YouPornCategoryIE(YouPornListBase):
IE_DESC = 'YouPorn category, with sorting, filtering and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?P<type>category)/(?P<id>[^/?#&]+)
(?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$)
'''
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/category/lingerie/popular/',
'info_dict': {
'id': 'lingerie/popular',
'title': 'Category lingerie videos by popular',
},
'playlist_mincount': 39,
}, {
'note': 'Filtered paginated list with single page result',
'url': 'https://www.youporn.com/category/lingerie/duration/?min_minutes=10',
'info_dict': {
'id': 'lingerie/duration/min_minutes=10',
'title': 'Category lingerie videos by duration (min_minutes=10)',
},
'playlist_maxcount': 30,
}, {
'note': 'Single page of full list',
'url': 'https://www.youporn.com/category/lingerie/popular?page=1',
'info_dict': {
'id': 'lingerie/popular/page=1',
'title': 'Category lingerie videos by popular (page=1)',
},
'playlist_count': 30,
}]
class YouPornChannelIE(YouPornListBase):
IE_DESC = 'YouPorn channel, with sorting and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?P<type>channel)/(?P<id>[^/?#&]+)
(?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
'''
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/channel/x-feeds/',
'info_dict': {
'id': 'x-feeds',
'title': 'Channel X-Feeds videos',
},
'playlist_mincount': 37,
}, {
'note': 'Single page of full list (no filters here)',
'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1',
'info_dict': {
'id': 'x-feeds/duration/page=1',
'title': 'Channel X-Feeds videos by duration (page=1)',
},
'playlist_count': 24,
}]
@staticmethod
def _get_title_from_slug(title_slug):
return re.sub(r'_', ' ', title_slug).title()
class YouPornCollectionIE(YouPornListBase):
IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?P<type>collection)s/videos/(?P<id>\d+)
(?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$)
'''
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+in\s'
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/collections/videos/33044251/',
'info_dict': {
'id': '33044251',
'title': 'Collection Sexy Lips videos',
'uploader': 'ph-littlewillyb',
},
'playlist_mincount': 50,
}, {
'note': 'Single page of full list (no filters here)',
'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1',
'info_dict': {
'id': '33044251/time/page=1',
'title': 'Collection Sexy Lips videos by time (page=1)',
'uploader': 'ph-littlewillyb',
},
'playlist_count': 20,
}]
def _real_extract(self, url):
pl_id = self._match_id(url)
html = self._download_webpage(url, pl_id)
playlist = super(YouPornCollectionIE, self)._real_extract(url, html=html)
infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class(
'collection-infos', html)) or '')
title, uploader = self._search_regex(
r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)',
infos, 'title/uploader', group=('title', 'uploader'), default=(None, None))
return merge_dicts({
'title': playlist['title'].replace(playlist['id'].split('/')[0], title),
'uploader': uploader,
}, playlist) if title else playlist
class YouPornTagIE(YouPornListBase):
IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
porn(?P<type>tag)s/(?P<id>[^/?#&]+)
(?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$)
'''
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+tagged\s'
_PAGE_RETRY_COUNT = 1
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/porntags/austrian',
'info_dict': {
'id': 'austrian',
'title': 'Tag austrian videos',
},
'playlist_mincount': 35,
'expected_warnings': ['Retrying duplicate page'],
}, {
'note': 'Filtered paginated list with single page result',
'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10',
'info_dict': {
'id': 'austrian/duration/min_minutes=10',
'title': 'Tag austrian videos by duration (min_minutes=10)',
},
# number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3,
# or more, varying with number of ads; let's set max as 9x4
# NB col 1 may not be shown in non-JS page with site CSS and zoom 100%
'playlist_maxcount': 32,
'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
}, {
'note': 'Single page of full list',
'url': 'https://www.youporn.com/porntags/austrian/?page=1',
'info_dict': {
'id': 'austrian/page=1',
'title': 'Tag austrian videos (page=1)',
},
'playlist_mincount': 32,
'playlist_maxcount': 34,
'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
}]
# YP tag navigation is broken, loses sort
def _get_next_url(self, url, pl_id, html):
next_url = super(YouPornTagIE, self)._get_next_url(url, pl_id, html)
if next_url:
n = self._match_valid_url(next_url)
if n:
s = n.groupdict().get('sort')
if s:
u = self._match_valid_url(url)
if u:
u = u.groupdict().get('sort')
if s and not u:
n = n.end('sort')
next_url = next_url[:n] + '/' + u + next_url[n:]
return next_url
class YouPornStarIE(YouPornListBase):
IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?P<type>pornstar)/(?P<id>[^/?#&]+)
(?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
'''
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+[fF]eaturing\s'
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/pornstar/daynia/',
'info_dict': {
'id': 'daynia',
'title': 'Pornstar Daynia videos',
'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+',
},
'playlist_mincount': 45,
}, {
'note': 'Single page of full list (no filters here)',
'url': 'https://www.youporn.com/pornstar/daynia/?page=1',
'info_dict': {
'id': 'daynia/page=1',
'title': 'Pornstar Daynia videos (page=1)',
'description': 're:.{180,}',
},
'playlist_count': 26,
}]
@staticmethod
def _get_title_from_slug(title_slug):
return re.sub(r'_', ' ', title_slug).title()
def _real_extract(self, url):
pl_id = self._match_id(url)
html = self._download_webpage(url, pl_id)
playlist = super(YouPornStarIE, self)._real_extract(url, html=html)
INFO_ELEMENT_RE = r'''(?x)
<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*>
(?P<info>[\s\S]+?)(?:</div>\s*){6,}
'''
infos = self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default='')
if infos:
infos = re.sub(
r'(?:\s*nl=nl)+\s*', ' ',
re.sub(r'(?u)\s+', ' ', clean_html(
re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '')
return merge_dicts({
'description': infos.strip() or None,
}, playlist)
class YouPornVideosIE(YouPornListBase):
IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?:(?P<id>browse)/)?
(?P<sort>(?(id)
(?:duration|rating|time|views)|
(?:most_(?:favou?rit|view)ed|recommended|top_rated)?))
(?:[/#?]|$)
'''
_PLAYLIST_TITLEBAR_RE = r'\s+(?:[Vv]ideos|VIDEOS)\s*$'
_TESTS = [{
'note': 'Full list with pagination (too long for test)',
'url': 'https://www.youporn.com/',
'info_dict': {
'id': 'youporn',
'title': 'YouPorn videos',
},
'only_matching': True,
}, {
'note': 'Full list with pagination (too long for test)',
'url': 'https://www.youporn.com/recommended',
'info_dict': {
'id': 'youporn/recommended',
'title': 'YouPorn videos by recommended',
},
'only_matching': True,
}, {
'note': 'Full list with pagination (too long for test)',
'url': 'https://www.youporn.com/top_rated',
'info_dict': {
'id': 'youporn/top_rated',
'title': 'YouPorn videos by top rated',
},
'only_matching': True,
}, {
'note': 'Full list with pagination (too long for test)',
'url': 'https://www.youporn.com/browse/time',
'info_dict': {
'id': 'browse/time',
'title': 'YouPorn videos by time',
},
'only_matching': True,
}, {
'note': 'Filtered paginated list with single page result',
'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2',
'info_dict': {
'id': 'youporn/most_favorited/max_minutes=2/res=VR',
'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)',
},
'playlist_mincount': 10,
'playlist_maxcount': 28,
}, {
'note': 'Filtered paginated list with several pages',
'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5',
'info_dict': {
'id': 'youporn/most_favorited/max_minutes=5/res=VR',
'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)',
},
'playlist_mincount': 45,
}, {
'note': 'Single page of full list',
'url': 'https://www.youporn.com/browse/time?page=1',
'info_dict': {
'id': 'browse/time/page=1',
'title': 'YouPorn videos by time (page=1)',
},
'playlist_count': 36,
}]
@staticmethod
def _get_title_from_slug(title_slug):
return 'YouPorn' if title_slug == 'browse' else title_slug

View File

@ -74,8 +74,11 @@ class FFmpegPostProcessor(PostProcessor):
return FFmpegPostProcessor(downloader)._versions return FFmpegPostProcessor(downloader)._versions
def _determine_executables(self): def _determine_executables(self):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] # ordered to match prefer_ffmpeg!
convs = ['ffmpeg', 'avconv']
probes = ['ffprobe', 'avprobe']
prefer_ffmpeg = True prefer_ffmpeg = True
programs = convs + probes
def get_ffmpeg_version(path): def get_ffmpeg_version(path):
ver = get_exe_version(path, args=['-version']) ver = get_exe_version(path, args=['-version'])
@ -127,10 +130,13 @@ class FFmpegPostProcessor(PostProcessor):
(p, get_ffmpeg_version(self._paths[p])) for p in programs) (p, get_ffmpeg_version(self._paths[p])) for p in programs)
if x[1] is not None) if x[1] is not None)
for p in ('ffmpeg', 'avconv')[::-1 if prefer_ffmpeg is False else 1]: basenames = [None, None]
if self._versions.get(p): for i, progs in enumerate((convs, probes)):
self.basename = self.probe_basename = p for p in progs[::-1 if prefer_ffmpeg is False else 1]:
break if self._versions.get(p):
basenames[i] = p
break
self.basename, self.probe_basename = basenames
@property @property
def available(self): def available(self):

View File

@ -2371,15 +2371,24 @@ def make_HTTPS_handler(params, **kwargs):
return YoutubeDLHTTPSHandler(params, context=context, **kwargs) return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
def bug_reports_message(): def bug_reports_message(before=';'):
if ytdl_is_updateable(): if ytdl_is_updateable():
update_cmd = 'type youtube-dl -U to update' update_cmd = 'type youtube-dl -U to update'
else: else:
update_cmd = 'see https://yt-dl.org/update on how to update' update_cmd = 'see https://github.com/ytdl-org/youtube-dl/#user-content-installation on how to update'
msg = '; please report this issue on https://yt-dl.org/bug .'
msg += ' Make sure you are using the latest version; %s.' % update_cmd msg = (
msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 'please report this issue on https://github.com/ytdl-org/youtube-dl/issues ,'
return msg ' using the appropriate issue template.'
' Make sure you are using the latest version; %s.'
' Be sure to call youtube-dl with the --verbose option and include the complete output.'
) % update_cmd
before = (before or '').rstrip()
if not before or before.endswith(('.', '!', '?')):
msg = msg[0].title() + msg[1:]
return (before + ' ' if before else '') + msg
class YoutubeDLError(Exception): class YoutubeDLError(Exception):