diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3da5f8020..32e3e4a39 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -494,6 +494,11 @@ from .hotstar import ( ) from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE +from .hqporner import ( + HQPornerIE, + HQPornerListIE, + HQPornerSearchIE, +) from .hrfernsehen import HRFernsehenIE from .hrti import ( HRTiIE, diff --git a/youtube_dl/extractor/hqporner.py b/youtube_dl/extractor/hqporner.py new file mode 100644 index 000000000..ef8f1fab2 --- /dev/null +++ b/youtube_dl/extractor/hqporner.py @@ -0,0 +1,233 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import re +from string import capwords + +from .common import InfoExtractor +from ..utils import ( + clean_html, + date_from_str, + get_element_by_class, + get_element_by_id, + int_or_none, + join_nonempty, + parse_duration, + strip_or_none, + unified_strdate, + urljoin, +) + + +class HQPornerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hqporner\.com/hdporn/(?P[\d]+)-' + _TESTS = [{ + 'url': 'https://hqporner.com/hdporn/110374-looking_for_a_change_of_pace.html', + 'md5': '7eb7b791a1ce8a619bde603b2dc334b5', + 'info_dict': { + 'id': '110374', + 'ext': 'mp4', + 'title': 'Looking For A Change Of Pace', + 'description': 'featuring Myra', + 'upload_date': '20230227', + 'age_limit': 18, + 'tags': list, + 'categories': list, + 'duration': 3271, + 'thumbnail': r're:https?://.*\.jpg', + }, + }, { + 'url': 'https://hqporner.com/hdporn/86482-all_night_rager.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + # some pages need a `referer` to avoid 404 + webpage = self._download_webpage(url, video_id, headers={'referer': 'https://hqporner.com/?q=porn'}) + + # details below video are in a
element + heading = self._search_regex(r'''(?s)(
.+?)\s*''', webpage, 'heading', default='') + title = ( + capwords(clean_html(get_element_by_class('main-h1', heading) or '')) + or self._html_search_regex( + r']*>\s*([^<]+)(?:\s+-\s+HQporner\.com)?\s*', + webpage, 'title')) + # video details are in a page loaded by this