1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2026-06-10 14:45:18 +00:00

Compare commits

..

16 Commits

Author SHA1 Message Date
Philipp Hagemeister a71d1414eb release 2014.08.24.4 2014-08-24 06:42:05 +02:00
Philipp Hagemeister 423817c468 [expotv] Add new extractor (Fixes #3552) 2014-08-24 06:41:55 +02:00
Philipp Hagemeister 51ed9fce09 [pornotube] Modernize 2014-08-24 06:16:24 +02:00
Philipp Hagemeister d43aeb1d00 release 2014.08.24.3 2014-08-24 05:32:31 +02:00
Philipp Hagemeister 4d805e063c [generic] Automatic detection of flow player and age_limit (Fixes #3576) 2014-08-24 05:31:32 +02:00
Philipp Hagemeister 24e5e24166 release 2014.08.24.2 2014-08-24 04:47:38 +02:00
Philipp Hagemeister 4d54ef20a2 [ministrygrid] Add extractor (Fixes #2900) 2014-08-24 04:47:28 +02:00
Philipp Hagemeister 54036b3991 [wayofthemaster] Remove unused import 2014-08-24 04:18:09 +02:00
Philipp Hagemeister e5402ac120 [wayofthemaster] Add extractor (Fixes #3575) 2014-08-24 04:14:02 +02:00
Philipp Hagemeister f56f8399c7 [ebaumsworld] Remove spurious determine_ext 2014-08-24 03:37:19 +02:00
Philipp Hagemeister cf0c5fa3a1 [ebaumsworld] Modernize 2014-08-24 03:31:38 +02:00
Philipp Hagemeister 8c2ccefae6 release 2014.08.24.1 2014-08-24 03:20:40 +02:00
Philipp Hagemeister 1f8b6af773 [bip.tv] Allow underscore in lookup ids (Fixes #3573) 2014-08-24 03:20:31 +02:00
Philipp Hagemeister 8f9b683eeb [blip.tv] Add legacy test case
This was broken in the mean time, so add a test case to make sure it doesn't break silently again.
2014-08-24 03:13:58 +02:00
Philipp Hagemeister b5f4775b38 [arte.tv:creative] Fix test case 2014-08-24 03:11:00 +02:00
Philipp Hagemeister 01d906ffe9 [arte:creative] Support more URLs (fixes #3572) 2014-08-24 02:57:32 +02:00
11 changed files with 297 additions and 39 deletions
+3
View File
@@ -88,6 +88,7 @@ from .engadget import EngadgetIE
from .escapist import EscapistIE
from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE
from .expotv import ExpoTVIE
from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE
from .faz import FazIE
@@ -184,6 +185,7 @@ from .malemotion import MalemotionIE
from .mdr import MDRIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .ministrygrid import MinistryGridIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mitele import MiTeleIE
from .mixcloud import MixcloudIE
@@ -390,6 +392,7 @@ from .vuclip import VuClipIE
from .vulture import VultureIE
from .washingtonpost import WashingtonPostIE
from .wat import WatIE
from .wayofthemaster import WayOfTheMasterIE
from .wdr import (
WDRIE,
WDRMobileIE,
+15 -5
View File
@@ -177,16 +177,26 @@ class ArteTVPlus7IE(InfoExtractor):
# It also uses the arte_vp_url url from the webpage to extract the information
class ArteTVCreativeIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:creative'
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/(?:magazine?/)?(?P<id>[^?#]+)'
_TEST = {
_TESTS = [{
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
'info_dict': {
'id': '050489-002',
'id': '72176',
'ext': 'mp4',
'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
'title': 'Folge 2 - Corporate Design',
'upload_date': '20131004',
},
}
}, {
'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
'info_dict': {
'id': '160676',
'ext': 'mp4',
'title': 'Monty Python live (mostly)',
'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
'upload_date': '20140805',
}
}]
class ArteTVFutureIE(ArteTVPlus7IE):
+17 -2
View File
@@ -15,7 +15,7 @@ from ..utils import (
class BlipTVIE(SubtitlesInfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))'
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_TESTS]+)))'
_TESTS = [
{
@@ -49,6 +49,21 @@ class BlipTVIE(SubtitlesInfoExtractor):
'uploader_id': '792887',
'duration': 279,
}
},
{
# https://bugzilla.redhat.com/show_bug.cgi?id=967465
'url': 'http://a.blip.tv/api.swf#h6Uag5KbVwI',
'md5': '314e87b1ebe7a48fcbfdd51b791ce5a6',
'info_dict': {
'id': '6573122',
'ext': 'mov',
'upload_date': '20130520',
'description': 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.',
'title': 'Red vs. Blue Season 11 Trailer',
'timestamp': 1369029609,
'uploader': 'redvsblue',
'uploader_id': '792887',
}
}
]
@@ -150,7 +165,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
class BlipTVUserIE(InfoExtractor):
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
_PAGE_SIZE = 12
IE_NAME = 'blip.tv:user'
+9 -8
View File
@@ -1,19 +1,21 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import determine_ext
class EbaumsWorldIE(InfoExtractor):
_VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.ebaumsworld.com/video/watch/83367677/',
u'file': u'83367677.mp4',
u'info_dict': {
u'title': u'A Giant Python Opens The Door',
u'description': u'This is how nightmares start...',
u'uploader': u'jihadpizza',
'url': 'http://www.ebaumsworld.com/video/watch/83367677/',
'info_dict': {
'id': '83367677',
'ext': 'mp4',
'title': 'A Giant Python Opens The Door',
'description': 'This is how nightmares start...',
'uploader': 'jihadpizza',
},
}
@@ -28,7 +30,6 @@ class EbaumsWorldIE(InfoExtractor):
'id': video_id,
'title': config.find('title').text,
'url': video_url,
'ext': determine_ext(video_url),
'description': config.find('description').text,
'thumbnail': config.find('image').text,
'uploader': config.find('username').text,
+73
View File
@@ -0,0 +1,73 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
)
class ExpoTVIE(InfoExtractor):
_VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
_TEST = {
'url': 'http://www.expotv.com/videos/reviews/1/24/LinneCardscom/17561',
'md5': '2985e6d7a392b2f7a05e0ca350fe41d0',
'info_dict': {
'id': '17561',
'ext': 'mp4',
'upload_date': '20060212',
'title': 'My Favorite Online Scrapbook Store',
'view_count': int,
'description': 'You\'ll find most everything you need at this virtual store front.',
'uploader': 'Anna T.',
'thumbnail': 're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
player_key = self._search_regex(
r'<param name="playerKey" value="([^"]+)"', webpage, 'player key')
config_url = 'http://client.expotv.com/video/config/%s/%s' % (
video_id, player_key)
config = self._download_json(
config_url, video_id,
note='Downloading video configuration')
formats = [{
'url': fcfg['file'],
'height': int_or_none(fcfg.get('height')),
'format_note': fcfg.get('label'),
'ext': self._search_regex(
r'filename=.*\.([a-z0-9_A-Z]+)&', fcfg['file'],
'file extension', default=None),
} for fcfg in config['sources']]
self._sort_formats(formats)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = config.get('image')
view_count = int_or_none(self._search_regex(
r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts'))
uploader = self._search_regex(
r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader',
fatal=False)
upload_date = unified_strdate(self._search_regex(
r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date',
fatal=False))
return {
'id': video_id,
'formats': formats,
'title': title,
'description': description,
'view_count': view_count,
'thumbnail': thumbnail,
'uploader': uploader,
'upload_date': upload_date,
}
+45 -2
View File
@@ -22,6 +22,7 @@ from ..utils import (
smuggle_url,
unescapeHTML,
unified_strdate,
unsmuggle_url,
url_basename,
)
from .brightcove import BrightcoveIE
@@ -330,6 +331,18 @@ class GenericIE(InfoExtractor):
'info_dict': {
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
}
},
# Flowplayer
{
'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
'md5': '9d65602bf31c6e20014319c7d07fba27',
'info_dict': {
'id': '5123ea6d5e5a7',
'ext': 'mp4',
'age_limit': 18,
'uploader': 'www.handjobhub.com',
'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
}
}
]
@@ -487,7 +500,14 @@ class GenericIE(InfoExtractor):
else:
assert ':' in default_search
return self.url_result(default_search + url)
video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
url, smuggled_data = unsmuggle_url(url)
force_videoid = None
if smuggled_data and 'force_videoid' in smuggled_data:
force_videoid = smuggled_data['force_videoid']
video_id = force_videoid
else:
video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
self.to_screen('%s: Requesting header' % video_id)
@@ -498,6 +518,9 @@ class GenericIE(InfoExtractor):
new_url = response.geturl()
if url != new_url:
self.report_following_redirect(new_url)
if force_videoid:
new_url = smuggle_url(
new_url, {'force_videoid': force_videoid})
return self.url_result(new_url)
# Check for direct link to a video
@@ -559,6 +582,16 @@ class GenericIE(InfoExtractor):
r'(?s)<title>(.*?)</title>', webpage, 'video title',
default='video')
# Try to detect age limit automatically
age_limit = self._rta_search(webpage)
# And then there are the jokers who advertise that they use RTA,
# but actually don't.
AGE_LIMIT_MARKERS = [
r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
]
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
age_limit = 18
# video uploader is domain name
video_uploader = self._search_regex(
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
@@ -640,7 +673,7 @@ class GenericIE(InfoExtractor):
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
if mobj:
return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9]+)', webpage)
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
if mobj:
return self.url_result(mobj.group(1), 'BlipTV')
@@ -822,6 +855,15 @@ class GenericIE(InfoExtractor):
if not found:
# Broaden the findall a little bit: JWPlayer JS loader
found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
if not found:
# Flow player
found = re.findall(r'''(?xs)
flowplayer\("[^"]+",\s*
\{[^}]+?\}\s*,
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
["']?url["']?\s*:\s*["']([^"']+)["']
''', webpage)
assert found
if not found:
# Try to find twitter cards info
found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
@@ -873,6 +915,7 @@ class GenericIE(InfoExtractor):
'url': video_url,
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
})
if len(entries) == 1:
+57
View File
@@ -0,0 +1,57 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
smuggle_url,
)
class MinistryGridIE(InfoExtractor):
_VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
_TEST = {
'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers',
'md5': '844be0d2a1340422759c2a9101bab017',
'info_dict': {
'id': '3453494717001',
'ext': 'mp4',
'title': 'The Gospel by Numbers',
'description': 'Coming soon from T4G 2014!',
'uploader': 'LifeWay Christian Resources (MG)',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
portlets_json = self._search_regex(
r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list')
portlets = json.loads(portlets_json)
pl_id = self._search_regex(
r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id')
for i, portlet in enumerate(portlets):
portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
portlet_code = self._download_webpage(
portlet_url, video_id,
note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)),
fatal=False)
video_iframe_url = self._search_regex(
r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
default=None)
if video_iframe_url:
surl = smuggle_url(
video_iframe_url, {'force_videoid': video_id})
return {
'_type': 'url',
'id': video_id,
'url': surl,
}
raise ExtractorError('Could not find video iframe in any portlets')
+24 -21
View File
@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -9,15 +11,16 @@ from ..utils import (
class PornotubeIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
_VALID_URL = r'https?://(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
_TEST = {
u'url': u'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
u'file': u'1689755.flv',
u'md5': u'374dd6dcedd24234453b295209aa69b6',
u'info_dict': {
u"upload_date": u"20090708",
u"title": u"Marilyn-Monroe-Bathing",
u"age_limit": 18
'url': 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
'md5': '374dd6dcedd24234453b295209aa69b6',
'info_dict': {
'id': '1689755',
'ext': 'flv',
'upload_date': '20090708',
'title': 'Marilyn-Monroe-Bathing',
'age_limit': 18
}
}
@@ -32,22 +35,22 @@ class PornotubeIE(InfoExtractor):
# Get the video URL
VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
video_url = self._search_regex(VIDEO_URL_RE, webpage, 'video url')
video_url = compat_urllib_parse.unquote(video_url)
#Get the uploaded date
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
if upload_date: upload_date = unified_strdate(upload_date)
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, 'upload date', fatal=False)
if upload_date:
upload_date = unified_strdate(upload_date)
age_limit = self._rta_search(webpage)
info = {'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': upload_date,
'title': video_title,
'ext': 'flv',
'format': 'flv',
'age_limit': age_limit}
return [info]
return {
'id': video_id,
'url': video_url,
'upload_date': upload_date,
'title': video_title,
'ext': 'flv',
'format': 'flv',
'age_limit': age_limit,
}
+52
View File
@@ -0,0 +1,52 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class WayOfTheMasterIE(InfoExtractor):
_VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])'
_TEST = {
'url': 'http://www.wayofthemaster.com/hbks.shtml',
'md5': '5316b57487ada8480606a93cb3d18d24',
'info_dict': {
'id': 'hbks',
'ext': 'mp4',
'title': 'Intelligent Design vs. Evolution',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._search_regex(
r'<img src="images/title_[^"]+".*?alt="([^"]+)"',
webpage, 'title', default=None)
if title is None:
title = self._html_search_regex(
r'<title>(.*?)</title>', webpage, 'page title')
url_base = self._search_regex(
r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"',
webpage, 'URL base')
formats = [{
'format_id': 'low',
'quality': 1,
'url': url_base + '_low.mp4',
}, {
'format_id': 'high',
'quality': 2,
'url': url_base + '_high.mp4',
}]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
}
+1
View File
@@ -855,6 +855,7 @@ def unified_strdate(date_str):
'%Y/%m/%d',
'%d.%m.%Y',
'%d/%m/%Y',
'%d/%m/%y',
'%Y/%m/%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%d.%m.%Y %H:%M',
+1 -1
View File
@@ -1,2 +1,2 @@
__version__ = '2014.08.24'
__version__ = '2014.08.24.4'