From be8786a6a46f43be54daa85d6c70091948175f34 Mon Sep 17 00:00:00 2001 From: danut007ro Date: Wed, 26 Sep 2012 21:00:28 +0300 Subject: [PATCH 1/3] Every extractor also return it's name. --- youtube_dl/InfoExtractors.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index ddb4aa16b..d2b9fbedc 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -445,6 +445,7 @@ class YoutubeIE(InfoExtractor): video_extension = self._video_extensions.get(format_param, 'flv') results.append({ + 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_real_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), @@ -578,6 +579,7 @@ class MetacafeIE(InfoExtractor): video_uploader = mobj.group(1) return [{ + 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), @@ -657,6 +659,7 @@ class DailymotionIE(InfoExtractor): video_uploader = mobj.group(1) return [{ + 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), @@ -752,6 +755,7 @@ class GoogleIE(InfoExtractor): video_thumbnail = '' return [{ + 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': u'NA', @@ -819,6 +823,7 @@ class PhotobucketIE(InfoExtractor): video_uploader = mobj.group(2).decode('utf-8') return [{ + 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, @@ -961,6 +966,7 @@ class YahooIE(InfoExtractor): video_url = unescapeHTML(video_url) return [{ + 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_url, 'uploader': video_uploader, @@ -1065,6 +1071,7 @@ class VimeoIE(InfoExtractor): %(video_id, sig, timestamp, quality, video_codec.upper()) return [{ + 'provider': IE_NAME, 'id': video_id, 'url': video_url, 'uploader': video_uploader, @@ -1213,6 +1220,7 @@ class GenericIE(InfoExtractor): video_uploader = mobj.group(1).decode('utf-8') return [{ + 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, @@ -1769,6 +1777,7 @@ class DepositFilesIE(InfoExtractor): file_title = mobj.group(1).decode('utf-8') return [{ + 'provider': IE_NAME, 'id': file_id.decode('utf-8'), 'url': file_url.decode('utf-8'), 'uploader': u'NA', @@ -1973,6 +1982,7 @@ class FacebookIE(InfoExtractor): video_extension = self._video_extensions.get(format_param, 'mp4') results.append({ + 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_real_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), @@ -2024,6 +2034,7 @@ class BlipTVIE(InfoExtractor): ext = ext.replace('.', '') self.report_direct_download(title) info = { + 'provider': IE_NAME, 'id': title, 'url': url, 'title': title, @@ -2055,6 +2066,7 @@ class BlipTVIE(InfoExtractor): ext = umobj.group(1) info = { + 'provider': IE_NAME, 'id': data['item_id'], 'url': video_url, 'uploader': data['display_name'], @@ -2124,6 +2136,7 @@ class MyVideoIE(InfoExtractor): video_title = mobj.group(1) return [{ + 'provider': IE_NAME, 'id': video_id, 'url': video_url, 'uploader': u'NA', @@ -2250,6 +2263,7 @@ class ComedyCentralIE(InfoExtractor): effTitle = showId + u'-' + epTitle info = { + 'provider': IE_NAME, 'id': shortMediaId, 'url': video_url, 'uploader': showId, @@ -2326,6 +2340,7 @@ class EscapistIE(InfoExtractor): videoUrl = playlist[1]['url'] info = { + 'provider': IE_NAME, 'id': videoId, 'url': videoUrl, 'uploader': showName, @@ -2377,6 +2392,7 @@ class CollegeHumorIE(InfoExtractor): internal_video_id = m.group('internalvideoid') info = { + 'provider': IE_NAME, 'id': video_id, 'internal_id': internal_video_id, } @@ -2462,6 +2478,7 @@ class XVideosIE(InfoExtractor): video_thumbnail = mobj.group(0).decode('utf-8') info = { + 'provider': IE_NAME, 'id': video_id, 'url': video_url, 'uploader': None, @@ -2559,6 +2576,7 @@ class SoundcloudIE(InfoExtractor): request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers) return [{ + 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': mediaURL, 'uploader': uploader.decode('utf-8'), @@ -2628,6 +2646,7 @@ class InfoQIE(InfoExtractor): video_id, extension = video_filename.split('.') info = { + 'provider': IE_NAME, 'id': video_id, 'url': video_url, 'uploader': None, @@ -2743,6 +2762,7 @@ class MixcloudIE(InfoExtractor): format_param = req_format return [{ + 'provider': IE_NAME, 'id': file_id.decode('utf-8'), 'url': file_url.decode('utf-8'), 'uploader': uploader.decode('utf-8'), @@ -2779,6 +2799,7 @@ class StanfordOpenClassroomIE(InfoExtractor): course = mobj.group('course') video = mobj.group('video') info = { + 'provider': IE_NAME, 'id': course + '_' + video, } @@ -2803,6 +2824,7 @@ class StanfordOpenClassroomIE(InfoExtractor): elif mobj.group('course'): # A course page course = mobj.group('course') info = { + 'provider': IE_NAME, 'id': course, 'type': 'playlist', } @@ -2839,6 +2861,7 @@ class StanfordOpenClassroomIE(InfoExtractor): else: # Root page info = { + 'provider': IE_NAME, 'id': 'Stanford OpenClassroom', 'type': 'playlist', } @@ -2946,6 +2969,7 @@ class MTVIE(InfoExtractor): return info = { + 'provider': IE_NAME, 'id': video_id, 'url': video_url, 'uploader': performer, From 3cd98c7894d2523752230d9fa3307e6e39723218 Mon Sep 17 00:00:00 2001 From: danut007ro Date: Thu, 27 Sep 2012 00:07:20 +0300 Subject: [PATCH 2/3] Removed provider (mistake) and add provider parameter to process_info --- youtube_dl/FileDownloader.py | 4 ++-- youtube_dl/InfoExtractors.py | 24 ------------------------ 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 14e872a98..1c5eeeabe 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -341,7 +341,7 @@ class FileDownloader(object): return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None - def process_info(self, info_dict): + def process_info(self, info_dict, provider): """Process a single dictionary returned by an InfoExtractor.""" info_dict['stitle'] = sanitize_filename(info_dict['title']) @@ -476,7 +476,7 @@ class FileDownloader(object): for video in videos or []: try: self.increment_downloads() - self.process_info(video) + self.process_info(video, ie.IE_NAME) except UnavailableVideoError: self.trouble(u'\nERROR: unable to download video') diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index e17b0529e..d95423f7d 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -445,7 +445,6 @@ class YoutubeIE(InfoExtractor): video_extension = self._video_extensions.get(format_param, 'flv') results.append({ - 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_real_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), @@ -579,7 +578,6 @@ class MetacafeIE(InfoExtractor): video_uploader = mobj.group(1) return [{ - 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), @@ -659,7 +657,6 @@ class DailymotionIE(InfoExtractor): video_uploader = mobj.group(1) return [{ - 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), @@ -755,7 +752,6 @@ class GoogleIE(InfoExtractor): video_thumbnail = '' return [{ - 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': u'NA', @@ -823,7 +819,6 @@ class PhotobucketIE(InfoExtractor): video_uploader = mobj.group(2).decode('utf-8') return [{ - 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, @@ -966,7 +961,6 @@ class YahooIE(InfoExtractor): video_url = unescapeHTML(video_url) return [{ - 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_url, 'uploader': video_uploader, @@ -1071,7 +1065,6 @@ class VimeoIE(InfoExtractor): %(video_id, sig, timestamp, quality, video_codec.upper()) return [{ - 'provider': IE_NAME, 'id': video_id, 'url': video_url, 'uploader': video_uploader, @@ -1220,7 +1213,6 @@ class GenericIE(InfoExtractor): video_uploader = mobj.group(1).decode('utf-8') return [{ - 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, @@ -1777,7 +1769,6 @@ class DepositFilesIE(InfoExtractor): file_title = mobj.group(1).decode('utf-8') return [{ - 'provider': IE_NAME, 'id': file_id.decode('utf-8'), 'url': file_url.decode('utf-8'), 'uploader': u'NA', @@ -1982,7 +1973,6 @@ class FacebookIE(InfoExtractor): video_extension = self._video_extensions.get(format_param, 'mp4') results.append({ - 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': video_real_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), @@ -2034,7 +2024,6 @@ class BlipTVIE(InfoExtractor): ext = ext.replace('.', '') self.report_direct_download(title) info = { - 'provider': IE_NAME, 'id': title, 'url': url, 'title': title, @@ -2066,7 +2055,6 @@ class BlipTVIE(InfoExtractor): ext = umobj.group(1) info = { - 'provider': IE_NAME, 'id': data['item_id'], 'url': video_url, 'uploader': data['display_name'], @@ -2136,7 +2124,6 @@ class MyVideoIE(InfoExtractor): video_title = mobj.group(1) return [{ - 'provider': IE_NAME, 'id': video_id, 'url': video_url, 'uploader': u'NA', @@ -2263,7 +2250,6 @@ class ComedyCentralIE(InfoExtractor): effTitle = showId + u'-' + epTitle info = { - 'provider': IE_NAME, 'id': shortMediaId, 'url': video_url, 'uploader': showId, @@ -2340,7 +2326,6 @@ class EscapistIE(InfoExtractor): videoUrl = playlist[1]['url'] info = { - 'provider': IE_NAME, 'id': videoId, 'url': videoUrl, 'uploader': showName, @@ -2392,7 +2377,6 @@ class CollegeHumorIE(InfoExtractor): internal_video_id = m.group('internalvideoid') info = { - 'provider': IE_NAME, 'id': video_id, 'internal_id': internal_video_id, } @@ -2478,7 +2462,6 @@ class XVideosIE(InfoExtractor): video_thumbnail = mobj.group(0).decode('utf-8') info = { - 'provider': IE_NAME, 'id': video_id, 'url': video_url, 'uploader': None, @@ -2576,7 +2559,6 @@ class SoundcloudIE(InfoExtractor): request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers) return [{ - 'provider': IE_NAME, 'id': video_id.decode('utf-8'), 'url': mediaURL, 'uploader': uploader.decode('utf-8'), @@ -2646,7 +2628,6 @@ class InfoQIE(InfoExtractor): video_id, extension = video_filename.split('.') info = { - 'provider': IE_NAME, 'id': video_id, 'url': video_url, 'uploader': None, @@ -2762,7 +2743,6 @@ class MixcloudIE(InfoExtractor): format_param = req_format return [{ - 'provider': IE_NAME, 'id': file_id.decode('utf-8'), 'url': file_url.decode('utf-8'), 'uploader': uploader.decode('utf-8'), @@ -2799,7 +2779,6 @@ class StanfordOpenClassroomIE(InfoExtractor): course = mobj.group('course') video = mobj.group('video') info = { - 'provider': IE_NAME, 'id': course + '_' + video, } @@ -2824,7 +2803,6 @@ class StanfordOpenClassroomIE(InfoExtractor): elif mobj.group('course'): # A course page course = mobj.group('course') info = { - 'provider': IE_NAME, 'id': course, 'type': 'playlist', } @@ -2861,7 +2839,6 @@ class StanfordOpenClassroomIE(InfoExtractor): else: # Root page info = { - 'provider': IE_NAME, 'id': 'Stanford OpenClassroom', 'type': 'playlist', } @@ -2969,7 +2946,6 @@ class MTVIE(InfoExtractor): return info = { - 'provider': IE_NAME, 'id': video_id, 'url': video_url, 'uploader': performer, From ae16f68f4abd67bf9b0386537620ca17c342ff98 Mon Sep 17 00:00:00 2001 From: danut007ro Date: Thu, 27 Sep 2012 00:35:31 +0300 Subject: [PATCH 3/3] Provider (youtube, etc) is now saved in info_dict, so template filename can be something like %(provider)s_%(id)s.%(ext)s This can be useful because videos should also be identified by their providers since id's can be the same on multiple providers. --- youtube_dl/FileDownloader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 1c5eeeabe..793fc3daf 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -341,7 +341,7 @@ class FileDownloader(object): return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None - def process_info(self, info_dict, provider): + def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" info_dict['stitle'] = sanitize_filename(info_dict['title']) @@ -475,8 +475,9 @@ class FileDownloader(object): videos = ie.extract(url) for video in videos or []: try: + video['provider'] = ie.IE_NAME self.increment_downloads() - self.process_info(video, ie.IE_NAME) + self.process_info(video) except UnavailableVideoError: self.trouble(u'\nERROR: unable to download video')