youtube-dl/youtube_dl/extractor/aznude.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor

from ..utils import urljoin

try:
    from urlparse import urlparse
except ImportError:
    from urllib.parse import urlparse

import re


class AZNudeIE(InfoExtractor):
    IE_NAME = 'aznude'
    _VALID_URL = r'https?://(?:www\.)?aznude\.com/(?:mrskin|azncdn)/[^/?]+/[^/?]+/(?P<id>.*)\.html'
    _TEST = {
        'url': 'https://www.aznude.com/mrskin/marisatomei/loiteringwithintent/loiteringwithintent-mcnallytomei-hd-01-hd.html',
        'md5': '28973bf7b818edfe55677b67bc073e40',
        'info_dict': {
            'id': 'loiteringwithintent-mcnallytomei-hd-01-hd',
            'ext': 'mp4',
            'title': 'Marisa Tomei in Loitering With Intent - 01',
            'description': 'Watch Marisa Tomei\'s Breasts scene on AZNude for free (22 seconds).',
        },
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        numeric_id = '-'.join(re.findall(r'(?P<num>(?:s\d+e\d+)|(?:\d+[xX]\d+)|(?:\d+))', video_id))
        webpage = self._download_webpage(url, video_id)

        jwplayer_data = self._find_jwplayer_data(webpage)
        parsed_formats = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)['formats']

        for format in parsed_formats:
            url = format['url']

            if url.endswith('-lo.' + format['ext']):
                format['format'] = 'Low Quality'
                format['format_id'] = 'LQ'
                format['quality'] = 1
                format['width'] = 640
                format['height'] = 360
                format['format_note'] = '360p video with mono audio'

            elif url.endswith('-hi.' + format['ext']):
                format['format'] = 'High Quality'
                format['format_id'] = 'HQ'
                format['quality'] = 2
                format['width'] = 640
                format['height'] = 360
                format['format_note'] = '360p video with stereo audio'

            elif url.endswith('-hd.' + format['ext']):
                format['format'] = 'High Definition'
                format['format_id'] = 'HD'
                format['quality'] = 3
                format['width'] = 1280
                format['height'] = 720
                format['format_note'] = '720p video with stereo audio'
            else:
                # Unknown format!
                parsed_formats.remove(format)


        artist = self._html_search_regex(r'(?P<artist><span><a href="/view/celeb/[^/?]/[^/?]+\.html">[^<]+</a></span>)',
                                    webpage,
                                    url,
                                    default=None)
        work = self._html_search_regex(r'in (?P<work><a href="/view/movie/[^/?]/[^/?]+\.html">[^<]+</a>)',
                                  webpage,
                                  url,
                                  default=None)

        if (artist is not None) and (work is not None):
            title = artist + ' in ' + work
        else:
            title = self._og_search_title(webpage)

        if numeric_id != "":
            title = title + ' - ' + numeric_id

        return {
            'id': video_id,
            'title': title,
            'description': self._og_search_description(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'formats': parsed_formats
        }


class AZNudeCollectionIE(InfoExtractor):
    IE_NAME = 'aznude:collection'
    _VALID_URL = r'https?://(?:www\.)?aznude\.com/(?:view/[^/]+/[^/]+|browse/(?:videos|tags/vids))/(?P<id>.+)\.html'
    _TESTS = [ {
        'url': 'http://www.aznude.com/view/celeb/m/marisatomei.html',
        'info_dict': {
            'title': 'Marisa Tomei Nude - Aznude ',
            'id': 'view/celeb/m/marisatomei.html',
        },
        'playlist_mincount': 33,
    }, {
        'url': 'https://www.aznude.com/view/movie/l/loiteringwithintent.html',
        'info_dict': {
            'title': 'Loitering With Intent Nude Scenes - Aznude',
            'id': 'view/movie/l/loiteringwithintent.html',
        },
        'playlist_mincount': 2,
    } ]

    def _real_extract(self, url):
        page_id = self._match_id(url)
        webpage = self._download_webpage(url, page_id)
        title = self._search_regex(r'(?:<title>)(?P<thetitle>.+)(?:</title>)', webpage, 'title', default=None).title()

        parse_result = urlparse(url)
        url_prefix = parse_result.scheme + '://' + parse_result.netloc

        entries = []
        for path in re.findall(r'(?:<a[^>]+href=")(?P<url>[^"]+)(?:"[^>]+class="(?:[^"]+ )?show-clip(?:"| [^"]+")[^>]*>)', webpage):
            if not path.startswith("//"):
                entries.append( self.url_result(urljoin(url_prefix, path), AZNudeIE.ie_key()) )

        return self.playlist_result(entries, page_id, title)
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`

Updated aznude.py 2017-07-28 06:18:22 +05:30			`from ..utils import urljoin`

[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`try:`
			`from urlparse import urlparse`
			`except ImportError:`
			`from urllib.parse import urlparse`

			`import re`


			`class AZNudeIE(InfoExtractor):`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`IE_NAME = 'aznude'`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`_VALID_URL = r'https?://(?:www\.)?aznude\.com/(?:mrskin\|azncdn)/[^/?]+/[^/?]+/(?P<id>.*)\.html'`
			`_TEST = {`
			`'url': 'https://www.aznude.com/mrskin/marisatomei/loiteringwithintent/loiteringwithintent-mcnallytomei-hd-01-hd.html',`
			`'md5': '28973bf7b818edfe55677b67bc073e40',`
			`'info_dict': {`
			`'id': 'loiteringwithintent-mcnallytomei-hd-01-hd',`
			`'ext': 'mp4',`
			`'title': 'Marisa Tomei in Loitering With Intent - 01',`
			`'description': 'Watch Marisa Tomei\'s Breasts scene on AZNude for free (22 seconds).',`
			`},`
			`}`

			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`numeric_id = '-'.join(re.findall(r'(?P<num>(?:s\d+e\d+)\|(?:\d+[xX]\d+)\|(?:\d+))', video_id))`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`webpage = self._download_webpage(url, video_id)`

Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`jwplayer_data = self._find_jwplayer_data(webpage)`
			`parsed_formats = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)['formats']`

			`for format in parsed_formats:`
			`url = format['url']`

			`if url.endswith('-lo.' + format['ext']):`
			`format['format'] = 'Low Quality'`
			`format['format_id'] = 'LQ'`
			`format['quality'] = 1`
Updated aznude.py 2017-07-28 06:18:22 +05:30			`format['width'] = 640`
			`format['height'] = 360`
			`format['format_note'] = '360p video with mono audio'`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30
			`elif url.endswith('-hi.' + format['ext']):`
			`format['format'] = 'High Quality'`
			`format['format_id'] = 'HQ'`
			`format['quality'] = 2`
Updated aznude.py 2017-07-28 06:18:22 +05:30			`format['width'] = 640`
			`format['height'] = 360`
			`format['format_note'] = '360p video with stereo audio'`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30
			`elif url.endswith('-hd.' + format['ext']):`
			`format['format'] = 'High Definition'`
			`format['format_id'] = 'HD'`
Updated aznude.py 2017-07-28 06:18:22 +05:30			`format['quality'] = 3`
			`format['width'] = 1280`
			`format['height'] = 720`
			`format['format_note'] = '720p video with stereo audio'`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`else:`
			`# Unknown format!`
			`parsed_formats.remove(format)`


			`artist = self._html_search_regex(r'(?P<artist><span><a href="/view/celeb/[^/?]/[^/?]+\.html">[^<]+</a></span>)',`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`webpage,`
			`url,`
			`default=None)`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`work = self._html_search_regex(r'in (?P<work><a href="/view/movie/[^/?]/[^/?]+\.html">[^<]+</a>)',`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`webpage,`
			`url,`
			`default=None)`

			`if (artist is not None) and (work is not None):`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`title = artist + ' in ' + work`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`else:`
			`title = self._og_search_title(webpage)`

Updated aznude.py 2017-07-28 06:18:22 +05:30			`if numeric_id != "":`
			`title = title + ' - ' + numeric_id`

[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`return {`
			`'id': video_id,`
Updated aznude.py 2017-07-28 06:18:22 +05:30			`'title': title,`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`'description': self._og_search_description(webpage),`
			`'thumbnail': self._og_search_thumbnail(webpage),`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`'formats': parsed_formats`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`}`


Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`class AZNudeCollectionIE(InfoExtractor):`
			`IE_NAME = 'aznude:collection'`
Updated aznude.py 2017-07-28 06:18:22 +05:30			`_VALID_URL = r'https?://(?:www\.)?aznude\.com/(?:view/[^/]+/[^/]+\|browse/(?:videos\|tags/vids))/(?P<id>.+)\.html'`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`_TESTS = [ {`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`'url': 'http://www.aznude.com/view/celeb/m/marisatomei.html',`
			`'info_dict': {`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`'title': 'Marisa Tomei Nude - Aznude ',`
			`'id': 'view/celeb/m/marisatomei.html',`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`},`
			`'playlist_mincount': 33,`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`}, {`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`'url': 'https://www.aznude.com/view/movie/l/loiteringwithintent.html',`
			`'info_dict': {`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`'title': 'Loitering With Intent Nude Scenes - Aznude',`
			`'id': 'view/movie/l/loiteringwithintent.html',`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00			`},`
			`'playlist_mincount': 2,`
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`} ]`

			`def _real_extract(self, url):`
			`page_id = self._match_id(url)`
			`webpage = self._download_webpage(url, page_id)`
			`title = self._search_regex(r'(?:<title>)(?P<thetitle>.+)(?:</title>)', webpage, 'title', default=None).title()`

			`parse_result = urlparse(url)`
			`url_prefix = parse_result.scheme + '://' + parse_result.netloc`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`entries = []`
			`for path in re.findall(r'(?:<a[^>]+href=")(?P<url>[^"]+)(?:"[^>]+class="(?:[^"]+ )?show-clip(?:"\| [^"]+")[^>]*>)', webpage):`
Updated aznude.py 2017-07-28 06:18:22 +05:30			`if not path.startswith("//"):`
			`entries.append( self.url_result(urljoin(url_prefix, path), AZNudeIE.ie_key()) )`
[aznude] Add new extractor 2017-07-12 00:45:29 +00:00
Updated aznude.py - Consolidated Movie and Celeb IE into a single CollectionIE - Added support for multiple formats 2017-07-18 13:12:05 +05:30			`return self.playlist_result(entries, page_id, title)`