Merge remote-tracking branch 'upstream/master'

2025-07-13 13:56:20 +00:00 · 2014-08-13 04:22:45 -07:00 · 2014-08-13 04:22:45 -07:00 · f96252b913
commit f96252b913
parent 04b89c9026 6f600ff5d6
15 changed files with 165 additions and 81 deletions
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -99,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase):
    def test_facebook_matching(self):
        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
        self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
    def test_no_duplicates(self):
        ies = gen_extractors()
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase):
        d = json.loads(stripped)
        self.assertEqual(d, [{"id": "532cb", "x": 3}])
-    def test_uppercase_escpae(self):
+    def test_uppercase_escape(self):
        self.assertEqual(uppercase_escape(u'aä'), u'aä')
        self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -68,6 +68,7 @@ __authors__  = (
    'Hassaan Ali',
    'Dobrosław Żybort',
    'David Fabijan',
    'Sebastian Haas',
 )
 __license__ = 'Public Domain'
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@ -295,7 +295,7 @@ class FileDownloader(object):
    def real_download(self, filename, info_dict):
        """Real download process. Redefine in subclasses."""
-        raise NotImplementedError(u'This method must be implemented by sublcasses')
+        raise NotImplementedError(u'This method must be implemented by subclasses')
    def _hook_progress(self, status):
        for ph in self._progress_hooks:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -225,9 +225,12 @@ from .nrk import (
 from .ntv import NTVIE
 from .nytimes import NYTimesIE
 from .nuvid import NuvidIE
 from .oe1 import OE1IE
 from .ooyala import OoyalaIE
-from .orf import ORFIE
+from .orf import (
    ORFTVthekIE,
    ORFOE1IE,
    ORFFM4IE,
 )
 from .parliamentliveuk import ParliamentLiveUKIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@ -6,6 +6,7 @@ import json
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
    int_or_none,
 )
@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor):
                formats.append({
                    'url': format_url,
                    'format': format['type'],
-                    'width': format['width'],
+                    'width': int_or_none(format['width']),
-                    'height': int(format['height']),
+                    'height': int_or_none(format['height']),
                })
            self._sort_formats(formats)
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@ -51,6 +51,9 @@ class ARDIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
        if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
            raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
        title = self._html_search_regex(
            [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
             r'<meta name="dcterms.title" content="(.*?)"/>',
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor):
            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
            return any(re.match(r, f['versionCode']) for r in regexes)
        # Some formats may not be in the same language as the url
        # TODO: Might want not to drop videos that does not match requested language
        # but to process those formats with lower precedence
        formats = filter(_match_lang, all_formats)
-        formats = list(formats) # in python3 filter returns an iterator
+        formats = list(formats)  # in python3 filter returns an iterator
        if not formats:
            # Some videos are only available in the 'Originalversion'
            # they aren't tagged as being in French or German
-            if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
+            # Sometimes there are neither videos of requested lang code
-                formats = all_formats
+            # nor original version videos available
-            else:
+            # For such cases we just take all_formats as is
-                raise ExtractorError(u'The formats list is empty')
+            formats = all_formats
            if not formats:
                raise ExtractorError('The formats list is empty')
        if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
            def sort_key(f):
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -20,7 +20,7 @@ from ..utils import (
 class FacebookIE(InfoExtractor):
    _VALID_URL = r'''(?x)
        https?://(?:\w+\.)?facebook\.com/
-        (?:[^#?]*\#!/)?
+        (?:[^#]*?\#!/)?
        (?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
        (?:v|video_id)=(?P<id>[0-9]+)
        (?:.*)'''
--- a/youtube_dl/extractor/oe1.py
+++ b/youtube_dl/extractor/oe1.py
@ -1,40 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import calendar
 import datetime
 import re
 from .common import InfoExtractor
 # audios on oe1.orf.at are only available for 7 days, so we can't
 # add tests.
 class OE1IE(InfoExtractor):
    IE_DESC = 'oe1.orf.at'
    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        show_id = mobj.group('id')
        data = self._download_json(
            'http://oe1.orf.at/programm/%s/konsole' % show_id,
            show_id
        )
        timestamp = datetime.datetime.strptime('%s %s' % (
            data['item']['day_label'],
            data['item']['time']
        ), '%d.%m.%Y %H:%M')
        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
        return {
            'id': show_id,
            'title': data['item']['title'],
            'url': data['item']['url_stream'],
            'ext': 'mp3',
            'description': data['item'].get('info'),
            'timestamp': unix_timestamp
        }
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@ -3,23 +3,38 @@ import re
 import json
 from .common import InfoExtractor
-from ..utils import unescapeHTML
+from ..utils import (
    unescapeHTML,
    ExtractorError,
 )
 class OoyalaIE(InfoExtractor):
    _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
-    _TEST = {
+    _TESTS = [
-        # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
+        {
-        'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+            # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
-        'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
+            'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
-        'info_dict': {
+            'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
-            'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+            'info_dict': {
-            'ext': 'mp4',
+                'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
-            'title': 'Explaining Data Recovery from Hard Drives and SSDs',
+                'ext': 'mp4',
-            'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+                'title': 'Explaining Data Recovery from Hard Drives and SSDs',
                'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
            },
        }, {
            # Only available for ipad
            'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
            'md5': '4b9754921fddb68106e48c142e2a01e6',
            'info_dict': {
                'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
                'ext': 'mp4',
                'title': 'Simulation Overview - Levels of Simulation',
                'description': '',
            },
        },
-    }
+    ]
    @staticmethod
    def _url_for_embed_code(embed_code):
@ -47,13 +62,30 @@ class OoyalaIE(InfoExtractor):
        player = self._download_webpage(player_url, embedCode)
        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
                                        player, 'mobile player url')
-        mobile_player = self._download_webpage(mobile_url, embedCode)
+        # Looks like some videos are only available for particular devices
-        videos_info = self._search_regex(
+        # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
-            r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
+        # is only available for ipad)
-            mobile_player, 'info').replace('\\"','"')
+        # Working around with fetching URLs for all the devices found starting with 'unknown'
-        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
+        # until we succeed or eventually fail for each device.
        devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
        devices.remove('unknown')
        devices.insert(0, 'unknown')
        for device in devices:
            mobile_player = self._download_webpage(
                '%s&device=%s' % (mobile_url, device), embedCode,
                'Downloading mobile player JS for %s device' % device)
            videos_info = self._search_regex(
                r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
                mobile_player, 'info', fatal=False, default=None)
            if videos_info:
                break
        if not videos_info:
            raise ExtractorError('Unable to extract info')
        videos_info = videos_info.replace('\\"', '"')
        videos_more_info = self._search_regex(
            r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
        videos_info = json.loads(videos_info)
-        videos_more_info =json.loads(videos_more_info)
+        videos_more_info = json.loads(videos_more_info)
        if videos_more_info.get('lineup'):
            videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@ -3,6 +3,8 @@ from __future__ import unicode_literals
 import json
 import re
 import calendar
 import datetime
 from .common import InfoExtractor
 from ..utils import (
@ -12,7 +14,9 @@ from ..utils import (
 )
-class ORFIE(InfoExtractor):
+class ORFTVthekIE(InfoExtractor):
    IE_NAME = 'orf:tvthek'
    IE_DESC = 'ORF TVthek'
    _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
    _TEST = {
@ -105,3 +109,73 @@ class ORFIE(InfoExtractor):
            'entries': entries,
            'id': playlist_id,
        }
 # Audios on ORF radio are only available for 7 days, so we can't add tests.
 class ORFOE1IE(InfoExtractor):
    IE_NAME = 'orf:oe1'
    IE_DESC = 'Radio Österreich 1'
    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        show_id = mobj.group('id')
        data = self._download_json(
            'http://oe1.orf.at/programm/%s/konsole' % show_id,
            show_id
        )
        timestamp = datetime.datetime.strptime('%s %s' % (
            data['item']['day_label'],
            data['item']['time']
        ), '%d.%m.%Y %H:%M')
        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
        return {
            'id': show_id,
            'title': data['item']['title'],
            'url': data['item']['url_stream'],
            'ext': 'mp3',
            'description': data['item'].get('info'),
            'timestamp': unix_timestamp
        }
 class ORFFM4IE(InfoExtractor):
    IE_DESC = 'orf:fm4'
    IE_DESC = 'radio FM4'
    _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        show_date = mobj.group('date')
        show_id = mobj.group('show')
        data = self._download_json(
            'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
            show_id
        )
        def extract_entry_dict(info, title, subtitle):
            return {
                'id': info['loopStreamId'].replace('.mp3', ''),
                'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
                'title': title,
                'description': subtitle,
                'duration': (info['end'] - info['start']) / 1000,
                'timestamp': info['start'] / 1000,
                'ext': 'mp3'
            }
        entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
        return {
            '_type': 'playlist',
            'id': show_id,
            'title': data['title'],
            'description': data['subtitle'],
            'entries': entries
        }
--- a/youtube_dl/extractor/reverbnation.py
+++ b/youtube_dl/extractor/reverbnation.py
@ -1,23 +1,23 @@
 from __future__ import unicode_literals
 import re
 import time
 from .common import InfoExtractor
-from ..utils import strip_jsonp
+from ..utils import str_or_none
 class ReverbNationIE(InfoExtractor):
    _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
    _TESTS = [{
        'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
        'file': '16965047.mp3',
        'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
        'info_dict': {
            "id": "16965047",
            "ext": "mp3",
            "title": "MONA LISA",
            "uploader": "ALKILADOS",
-            "uploader_id": 216429,
+            "uploader_id": "216429",
-            "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
+            "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$"
        },
    }]
@ -26,10 +26,8 @@ class ReverbNationIE(InfoExtractor):
        song_id = mobj.group('id')
        api_res = self._download_json(
-            'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
+            'https://api.reverbnation.com/song/%s' % song_id,
                % (song_id, int(time.time() * 1000)),
            song_id,
            transform_source=strip_jsonp,
            note='Downloading information of song %s' % song_id
        )
@ -38,8 +36,9 @@ class ReverbNationIE(InfoExtractor):
            'title': api_res.get('name'),
            'url': api_res.get('url'),
            'uploader': api_res.get('artist', {}).get('name'),
-            'uploader_id': api_res.get('artist', {}).get('id'),
+            'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
-            'thumbnail': api_res.get('image', api_res.get('thumbnail')),
+            'thumbnail': self._proto_relative_url(
                api_res.get('image', api_res.get('thumbnail'))),
            'ext': 'mp3',
            'vcodec': 'none',
        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1273,9 +1273,15 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
    if get_attr:
        if v is not None:
            v = getattr(v, get_attr, None)
    if v == '':
        v = None
    return default if v is None else (int(v) * invscale // scale)
 def str_or_none(v, default=None):
    return default if v is None else compat_str(v)
 def str_to_int(int_str):
    if int_str is None:
        return None
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@
-__version__ = '2014.08.05'
+__version__ = '2014.08.10'
`@ -1,2 +1,2 @@`

	`__version__ = '2014.08.05'`	`__version__ = '2014.08.10'`