release 2014.04.01.2

[youtube] Transform google's JSON dialect (fixes #2663 )
release 2014.04.01.1
2026-06-17 01:50:15 +00:00 · 2014-04-01 05:57:15 +02:00 · 2014-04-01 05:56:56 +02:00 · 2014-04-01 00:25:17 +02:00 · 2014-04-01 00:25:11 +02:00 · 2014-04-01 00:02:39 +02:00
9 changed files with 81 additions and 50 deletions
@@ -144,7 +144,15 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])

    def test_ComedyCentralShows(self):
-        self.assertMatch('http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
+            ['ComedyCentralShows'])

 if __name__ == '__main__':
    unittest.main()
@@ -256,9 +256,10 @@ def parseOpts(overrideArguments=None):
    general.add_option(
        '--bidi-workaround', dest='bidi_workaround', action='store_true',
        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
-    general.add_option('--default-search',
-            dest='default_search', metavar='PREFIX',
-            help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
+    general.add_option(
+        '--default-search',
+        dest='default_search', metavar='PREFIX',
+        help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
    general.add_option(
        '--ignore-config',
        action='store_true',
@@ -676,7 +677,7 @@ def _real_main(argv=None):
        date = DateRange.day(opts.date)
    else:
        date = DateRange(opts.dateafter, opts.datebefore)
-    if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
+    if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')

    # Do not download videos when there are audio-only formats
@@ -43,11 +43,13 @@ class ComedyCentralShowsIE(InfoExtractor):
                          (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
                         (full-episodes/(?P<episode>.*)|
                          (?P<clip>
-                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
-                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
+                              (?:videos/[^/]+/(?P<videotitle>[^/?#]+))
+                              |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
+                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
+                          )|
                          (?P<interview>
                              extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
-                     $'''
+                     (?:[?#].*|$)'''
    _TEST = {
        'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
        'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
@@ -102,7 +104,9 @@ class ComedyCentralShowsIE(InfoExtractor):
            assert mobj is not None

        if mobj.group('clip'):
-            if mobj.group('showname') == 'thedailyshow':
+            if mobj.group('videotitle'):
+                epTitle = mobj.group('videotitle')
+            elif mobj.group('showname') == 'thedailyshow':
                epTitle = mobj.group('tdstitle')
            else:
                epTitle = mobj.group('cntitle')
@@ -316,13 +316,16 @@ class GenericIE(InfoExtractor):
        if not parsed_url.scheme:
            default_search = self._downloader.params.get('default_search')
            if default_search is None:
-                default_search = 'auto'
+                default_search = 'auto_warning'

-            if default_search == 'auto':
+            if default_search in ('auto', 'auto_warning'):
                if '/' in url:
                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
                    return self.url_result('http://' + url)
                else:
+                    if default_search == 'auto_warning':
+                        self._downloader.report_warning(
+                            'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url)
                    return self.url_result('ytsearch:' + url)
            else:
                assert ':' in default_search
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import os

@@ -5,45 +7,50 @@ from .common import InfoExtractor


 class PyvideoIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
-    _TESTS = [{
-        u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
-        u'file': u'24_4WWkSmNo.mp4',
-        u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
-        u'info_dict': {
-            u"title": u"Become a logging expert in 30 minutes",
-            u"description": u"md5:9665350d466c67fb5b1598de379021f7",
-            u"upload_date": u"20130320",
-            u"uploader": u"NextDayVideo",
-            u"uploader_id": u"NextDayVideo",
+    _VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
+
+    _TESTS = [
+        {
+            'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
+            'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
+            'info_dict': {
+                'id': '24_4WWkSmNo',
+                'ext': 'mp4',
+                'title': 'Become a logging expert in 30 minutes',
+                'description': 'md5:9665350d466c67fb5b1598de379021f7',
+                'upload_date': '20130320',
+                'uploader': 'NextDayVideo',
+                'uploader_id': 'NextDayVideo',
+            },
+            'add_ie': ['Youtube'],
        },
-        u'add_ie': ['Youtube'],
-    },
-    {
-        u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
-        u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
-        u'info_dict': {
-            u'id': u'2542',
-            u'ext': u'm4v',
-            u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
+        {
+            'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
+            'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
+            'info_dict': {
+                'id': '2542',
+                'ext': 'm4v',
+                'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
+            },
        },
-    },
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)

+        webpage = self._download_webpage(url, video_id)
+
+        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
        if m_youtube is not None:
            return self.url_result(m_youtube.group(1), 'Youtube')

-        title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
-            webpage, u'title', flags=re.DOTALL)
-        video_url = self._search_regex([r'<source src="(.*?)"',
-            r'<dt>Download</dt>.*?<a href="(.+?)"'],
-            webpage, u'video url', flags=re.DOTALL)
+        title = self._html_search_regex(
+            r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
+        video_url = self._search_regex(
+            [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
+            webpage, 'video url', flags=re.DOTALL)
+
        return {
            'id': video_id,
            'title': os.path.splitext(title)[0],
@@ -20,8 +20,9 @@ class RutubeIE(InfoExtractor):

    _TEST = {
        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
-        'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4',
        'info_dict': {
+            'id': '3eac3b4561676c17df9132a9a1e62e3e',
+            'ext': 'mp4',
            'title': 'Раненный кенгуру забежал в аптеку',
            'description': 'http://www.ntdtv.ru ',
            'duration': 80,
@@ -39,12 +40,14 @@ class RutubeIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        
-        api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id,
-                                              video_id, 'Downloading video JSON')
+        api_response = self._download_webpage(
+            'http://rutube.ru/api/video/%s/?format=json' % video_id,
+            video_id, 'Downloading video JSON')
        video = json.loads(api_response)
        
-        api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
-                                              video_id, 'Downloading trackinfo JSON')
+        api_response = self._download_webpage(
+            'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
+            video_id, 'Downloading trackinfo JSON')
        trackinfo = json.loads(api_response)
        
        # Some videos don't have the author field
@@ -116,7 +116,7 @@ class WDRIE(InfoExtractor):


 class WDRMausIE(InfoExtractor):
-    _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:extras/|sachgeschichten/sachgeschichten/)?(?P<id>[^/?#]+)(?:/index\.php5|\.php5|/(?:$|[?#]))'
+    _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
    IE_DESC = 'Sendung mit der Maus'
    _TESTS = [{
        'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
@@ -1446,7 +1446,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                break

            more = self._download_json(
-                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
+                'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+                'Downloading page #%s' % page_num,
+                transform_source=uppercase_escape)
            content_html = more['content_html']
            more_widget_html = more['load_more_widget_html']

@@ -1609,7 +1611,7 @@ class YoutubeUserIE(InfoExtractor):

 class YoutubeSearchIE(SearchInfoExtractor):
    IE_DESC = u'YouTube.com searches'
-    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
+    _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
    _MAX_RESULTS = 1000
    IE_NAME = u'youtube:search'
    _SEARCH_KEY = 'ytsearch'
@@ -1620,9 +1622,12 @@ class YoutubeSearchIE(SearchInfoExtractor):
        video_ids = []
        pagenum = 0
        limit = n
+        PAGE_SIZE = 50

-        while (50 * pagenum) < limit:
-            result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
+        while (PAGE_SIZE * pagenum) < limit:
+            result_url = self._API_URL % (
+                compat_urllib_parse.quote_plus(query.encode('utf-8')),
+                (PAGE_SIZE * pagenum) + 1)
            data_json = self._download_webpage(
                result_url, video_id=u'query "%s"' % query,
                note=u'Downloading page %s' % (pagenum + 1),
@@ -1,2 +1,2 @@

-__version__ = '2014.03.30'
+__version__ = '2014.04.01.2'
Author	SHA1	Message	Date
Philipp Hagemeister	c8fc3fb524	release 2014.04.01.2	2014-04-01 05:57:15 +02:00
Philipp Hagemeister	5912c639df	[youtube] Transform google's JSON dialect (fixes #2663 )	2014-04-01 05:56:56 +02:00
Philipp Hagemeister	017e4dd58c	release 2014.04.01.1	2014-04-01 00:25:17 +02:00
Philipp Hagemeister	651486621d	[comedycentral] Allow URLs with query parts (fixes #2661 )	2014-04-01 00:25:11 +02:00
Philipp Hagemeister	28d9032c88	release 2014.04.01	2014-04-01 00:02:39 +02:00
Philipp Hagemeister	16f4eb723a	[comedycentral] Add support for /videos URLs (Fixes #2660 )	2014-04-01 00:02:32 +02:00
Sergey M․	1cbd410620	[pyvideo] Modernize	2014-03-31 19:31:48 +07:00
Philipp Hagemeister	d41ac5f5dc	release 2014.03.30.1	2014-03-30 15:57:47 +02:00
Philipp Hagemeister	9c1fc022ae	[generic] Warn before fallback to automatic search	2014-03-30 15:57:35 +02:00
Philipp Hagemeister	83d548ef0f	[youtube] Encode ytsearch query	2014-03-30 15:57:35 +02:00
Sergey M․	c72477bd32	[rutube] Modernize	2014-03-30 15:35:07 +07:00
Philipp Hagemeister	9a7b072e38	[wdr] Add support for more wdrmaus subpages	2014-03-30 07:42:35 +02:00