1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2025-02-21 17:39:50 +00:00
youtube-dl/youtube_dl/extractor/aznude.py

128 lines
4.8 KiB
Python
Raw Normal View History

2017-07-12 00:45:29 +00:00
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
2017-07-28 00:48:22 +00:00
from ..utils import urljoin
2017-07-12 00:45:29 +00:00
try:
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
import re
class AZNudeIE(InfoExtractor):
IE_NAME = 'aznude'
2017-07-12 00:45:29 +00:00
_VALID_URL = r'https?://(?:www\.)?aznude\.com/(?:mrskin|azncdn)/[^/?]+/[^/?]+/(?P<id>.*)\.html'
_TEST = {
'url': 'https://www.aznude.com/mrskin/marisatomei/loiteringwithintent/loiteringwithintent-mcnallytomei-hd-01-hd.html',
'md5': '28973bf7b818edfe55677b67bc073e40',
'info_dict': {
'id': 'loiteringwithintent-mcnallytomei-hd-01-hd',
'ext': 'mp4',
'title': 'Marisa Tomei in Loitering With Intent - 01',
'description': 'Watch Marisa Tomei\'s Breasts scene on AZNude for free (22 seconds).',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
numeric_id = '-'.join(re.findall(r'(?P<num>(?:s\d+e\d+)|(?:\d+[xX]\d+)|(?:\d+))', video_id))
2017-07-12 00:45:29 +00:00
webpage = self._download_webpage(url, video_id)
jwplayer_data = self._find_jwplayer_data(webpage)
parsed_formats = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)['formats']
for format in parsed_formats:
url = format['url']
if url.endswith('-lo.' + format['ext']):
format['format'] = 'Low Quality'
format['format_id'] = 'LQ'
format['quality'] = 1
2017-07-28 00:48:22 +00:00
format['width'] = 640
format['height'] = 360
format['format_note'] = '360p video with mono audio'
elif url.endswith('-hi.' + format['ext']):
format['format'] = 'High Quality'
format['format_id'] = 'HQ'
format['quality'] = 2
2017-07-28 00:48:22 +00:00
format['width'] = 640
format['height'] = 360
format['format_note'] = '360p video with stereo audio'
elif url.endswith('-hd.' + format['ext']):
format['format'] = 'High Definition'
format['format_id'] = 'HD'
2017-07-28 00:48:22 +00:00
format['quality'] = 3
format['width'] = 1280
format['height'] = 720
format['format_note'] = '720p video with stereo audio'
else:
# Unknown format!
parsed_formats.remove(format)
artist = self._html_search_regex(r'(?P<artist><span><a href="/view/celeb/[^/?]/[^/?]+\.html">[^<]+</a></span>)',
2017-07-12 00:45:29 +00:00
webpage,
url,
default=None)
work = self._html_search_regex(r'in (?P<work><a href="/view/movie/[^/?]/[^/?]+\.html">[^<]+</a>)',
2017-07-12 00:45:29 +00:00
webpage,
url,
default=None)
if (artist is not None) and (work is not None):
title = artist + ' in ' + work
2017-07-12 00:45:29 +00:00
else:
title = self._og_search_title(webpage)
2017-07-28 00:48:22 +00:00
if numeric_id != "":
title = title + ' - ' + numeric_id
2017-07-12 00:45:29 +00:00
return {
'id': video_id,
2017-07-28 00:48:22 +00:00
'title': title,
2017-07-12 00:45:29 +00:00
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': parsed_formats
2017-07-12 00:45:29 +00:00
}
class AZNudeCollectionIE(InfoExtractor):
IE_NAME = 'aznude:collection'
2017-07-28 00:48:22 +00:00
_VALID_URL = r'https?://(?:www\.)?aznude\.com/(?:view/[^/]+/[^/]+|browse/(?:videos|tags/vids))/(?P<id>.+)\.html'
_TESTS = [ {
2017-07-12 00:45:29 +00:00
'url': 'http://www.aznude.com/view/celeb/m/marisatomei.html',
'info_dict': {
'title': 'Marisa Tomei Nude - Aznude ',
'id': 'view/celeb/m/marisatomei.html',
2017-07-12 00:45:29 +00:00
},
'playlist_mincount': 33,
}, {
2017-07-12 00:45:29 +00:00
'url': 'https://www.aznude.com/view/movie/l/loiteringwithintent.html',
'info_dict': {
'title': 'Loitering With Intent Nude Scenes - Aznude',
'id': 'view/movie/l/loiteringwithintent.html',
2017-07-12 00:45:29 +00:00
},
'playlist_mincount': 2,
} ]
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
title = self._search_regex(r'(?:<title>)(?P<thetitle>.+)(?:</title>)', webpage, 'title', default=None).title()
parse_result = urlparse(url)
url_prefix = parse_result.scheme + '://' + parse_result.netloc
2017-07-12 00:45:29 +00:00
entries = []
for path in re.findall(r'(?:<a[^>]+href=")(?P<url>[^"]+)(?:"[^>]+class="(?:[^"]+ )?show-clip(?:"| [^"]+")[^>]*>)', webpage):
2017-07-28 00:48:22 +00:00
if not path.startswith("//"):
entries.append( self.url_result(urljoin(url_prefix, path), AZNudeIE.ie_key()) )
2017-07-12 00:45:29 +00:00
return self.playlist_result(entries, page_id, title)