1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2026-06-11 23:20:15 +00:00

Compare commits

..

17 Commits

Author SHA1 Message Date
Philipp Hagemeister 492641d10a release 2014.08.22.3 2014-08-22 18:41:43 +02:00
Philipp Hagemeister 2b9faf5542 [rtve] Add support for live stream
At the moment, only RTVE-1 seems to work flawlessly.
-2 seems geoblocked right now.
-TDP doesn't seem to be available outside of Spain.
2014-08-22 18:40:28 +02:00
Philipp Hagemeister ed2d6a1960 [generic] Simplify playlist support (#2948) 2014-08-22 18:19:56 +02:00
Philipp Hagemeister be843678b1 [YouTubeDL] Correct handling of age_limit = None in result 2014-08-22 17:46:57 +02:00
Philipp Hagemeister c71dfccc98 Merge remote-tracking branch 'anovicecodemonkey/generic-data-video-url'
Conflicts:
	youtube_dl/extractor/generic.py
2014-08-22 17:40:36 +02:00
Philipp Hagemeister 1a9ccac7c1 Merge remote-tracking branch 'origin/master' 2014-08-22 17:38:11 +02:00
Philipp Hagemeister e330d59abb [playfm] Add extractor (Fixes #3538) 2014-08-22 17:38:06 +02:00
Sergey M․ 394df6d7d0 [nuvid] Adapt to latest layout changes 2014-08-22 21:41:51 +07:00
Philipp Hagemeister 218f754940 [README] Add thumbnail to _TEST example
While it's not mandatory, extractors are highly encouraged to provide a thumbnail field.
2014-08-22 11:30:49 +02:00
Philipp Hagemeister a053c3493a [test_YoutubeDL] Reorder formats (#3542) 2014-08-22 03:44:30 +02:00
Philipp Hagemeister 50b294aab8 release 2014.08.22.2 2014-08-22 03:16:16 +02:00
Philipp Hagemeister 756b046f3e [pbs] recognize class=partnerPlayer as well (Fixes #3564) 2014-08-22 03:16:08 +02:00
Philipp Hagemeister 388ac0b18a release 2014.08.22.1 2014-08-22 03:02:49 +02:00
anovicecodemonkey 37e3cbe22e Move duplicate check to generic.py 2014-06-01 01:16:35 +09:30
anovicecodemonkey 610134730a Add a _TEST_ 2014-05-21 19:25:37 +09:30
anovicecodemonkey 212a5e28ba Add a duplicate check to /extractor/common.py playlist_result function 2014-05-21 19:04:55 +09:30
anovicecodemonkey 3442b30ab2 [generic] Support data-video-url for YouTube embeds (Fixes #2862) 2014-05-18 23:15:09 +09:30
11 changed files with 259 additions and 71 deletions
+1
View File
@@ -429,6 +429,7 @@ If you want to add support for a new site, you can follow this quick list (assum
'id': '42',
'ext': 'mp4',
'title': 'Video title goes here',
'thumbnail': 're:^https?://.*\.jpg$',
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
+1 -1
View File
@@ -221,7 +221,7 @@ class TestFormatSelection(unittest.TestCase):
'138', '137', '248', '136', '247', '135', '246',
'245', '244', '134', '243', '133', '242', '160',
# Dash audio
'141', '172', '140', '139', '171',
'141', '172', '140', '171', '139',
]
for f1id, f2id in zip(order, order[1:]):
+4 -1
View File
@@ -480,7 +480,10 @@ class YoutubeDL(object):
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
age_limit = self.params.get('age_limit')
if age_limit is not None:
if age_limit < info_dict.get('age_limit', 0):
actual_age_limit = info_dict.get('age_limit')
if actual_age_limit is None:
actual_age_limit = 0
if age_limit < actual_age_limit:
return 'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict):
return '%s has already been recorded in archive' % video_title
+2 -1
View File
@@ -243,6 +243,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .playfm import PlayFMIE
from .playvid import PlayvidIE
from .podomatic import PodomaticIE
from .pornhd import PornHdIE
@@ -263,7 +264,7 @@ from .rtbf import RTBFIE
from .rtlnl import RtlXlIE
from .rtlnow import RTLnowIE
from .rts import RTSIE
from .rtve import RTVEALaCartaIE
from .rtve import RTVEALaCartaIE, RTVELiveIE
from .ruhd import RUHDIE
from .rutube import (
RutubeIE,
+30 -12
View File
@@ -16,6 +16,7 @@ from ..utils import (
ExtractorError,
HEADRequest,
orderedSet,
parse_xml,
smuggle_url,
unescapeHTML,
@@ -289,6 +290,22 @@ class GenericIE(InfoExtractor):
'description': 'Mario\'s life in the fast lane has never looked so good.',
},
},
# YouTube embed via <data-embed-url="">
{
'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
'info_dict': {
'id': 'jpSGZsgga_I',
'ext': 'mp4',
'title': 'Asphalt 8: Airborne - Launch Trailer',
'uploader': 'Gameloft',
'uploader_id': 'gameloft',
'upload_date': '20130821',
'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
},
'params': {
'skip_download': True,
}
}
]
def report_download_webpage(self, video_id):
@@ -479,6 +496,12 @@ class GenericIE(InfoExtractor):
video_uploader = self._search_regex(
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
# Helper method
def _playlist_from_matches(matches, getter, ie=None):
urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for BrightCove:
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
if bc_urls:
@@ -514,6 +537,7 @@ class GenericIE(InfoExtractor):
matches = re.findall(r'''(?x)
(?:
<iframe[^>]+?src=|
data-video-url=|
<embed[^>]+?src=|
embedSWF\(?:\s*
)
@@ -522,19 +546,15 @@ class GenericIE(InfoExtractor):
(?:embed|v)/.+?)
\1''', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
for tuppl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
return _playlist_from_matches(
matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
# Look for embedded Dailymotion player
matches = re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]))
for tuppl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
return _playlist_from_matches(
matches, lambda m: unescapeHTML(m[1]))
# Look for embedded Wistia player
match = re.search(
@@ -648,10 +668,8 @@ class GenericIE(InfoExtractor):
# Look for funnyordie embed
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
for eurl in matches]
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
return _playlist_from_matches(
matches, getter=unescapeHTML, ie='FunnyOrDie')
# Look for embedded RUTV player
rutv_url = RUTVIE._extract_url(webpage)
+13 -8
View File
@@ -38,7 +38,7 @@ class NuvidIE(InfoExtractor):
webpage = self._download_webpage(
request, video_id, 'Downloading %s page' % format_id)
video_url = self._html_search_regex(
r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
if not video_url:
continue
formats.append({
@@ -49,19 +49,24 @@ class NuvidIE(InfoExtractor):
webpage = self._download_webpage(
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
title = self._html_search_regex(
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
thumbnail = self._html_search_regex(
r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
webpage, 'thumbnail URL', fatal=False)
[r'<span title="([^"]+)">',
r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip()
thumbnails = [
{
'url': thumb_url,
} for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
]
thumbnail = thumbnails[0]['url'] if thumbnails else None
duration = parse_duration(self._html_search_regex(
r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False))
upload_date = unified_strdate(self._html_search_regex(
r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))
return {
'id': video_id,
'title': title,
'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
'thumbnails': thumbnails,
'thumbnail': thumbnail,
'duration': duration,
'upload_date': upload_date,
'age_limit': 18,
+13 -1
View File
@@ -54,6 +54,18 @@ class PBSIE(InfoExtractor):
'duration': 801,
},
},
{
'url': 'http://www.pbs.org/wnet/gperf/dudamel-conducts-verdi-requiem-hollywood-bowl-full-episode/3374/',
'md5': 'c62859342be2a0358d6c9eb306595978',
'info_dict': {
'id': '2365297708',
'ext': 'mp4',
'description': 'md5:68d87ef760660eb564455eb30ca464fe',
'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
'duration': 6559,
'thumbnail': 're:^https?://.*\.jpg$',
}
}
]
def _extract_ids(self, url):
@@ -75,7 +87,7 @@ class PBSIE(InfoExtractor):
return media_id, presumptive_id
url = self._search_regex(
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
webpage, 'player URL')
mobj = re.match(self._VALID_URL, url)
+82
View File
@@ -0,0 +1,82 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
float_or_none,
int_or_none,
)
class PlayFMIE(InfoExtractor):
IE_NAME = 'play.fm'
_VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
_TEST = {
'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
'md5': 'c505f8307825a245d0c7ad1850001f22',
'info_dict': {
'id': '137220',
'ext': 'mp3',
'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
'uploader': 'Sven Tasnadi',
'uploader_id': 'sventasnadi',
'duration': 5627.428,
'upload_date': '20140712',
'view_count': int,
'thumbnail': 're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
upload_date = mobj.group('upload_date')
rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
req = compat_urllib_request.Request(
'http://www.play.fm/flexRead/recording', data=rec_data)
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
rec_doc = self._download_xml(req, video_id)
error_node = rec_doc.find('./error')
if error_node is not None:
raise ExtractorError('An error occured: %s (code %s)' % (
error_node.text, rec_doc.find('./status').text))
recording = rec_doc.find('./recording')
title = recording.find('./title').text
view_count = int_or_none(recording.find('./stats/playcount').text)
duration = float_or_none(recording.find('./duration').text, scale=1000)
thumbnail = recording.find('./image').text
artist = recording.find('./artists/artist')
uploader = artist.find('./name').text
uploader_id = artist.find('./slug').text
video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
'http:', recording.find('./url').text,
recording.find('./_class').text, recording.find('./file_id').text,
rec_doc.find('./uuid').text, video_id,
rec_doc.find('./jingle/file_id').text,
'http%3A%2F%2Fwww.play.fm%2Fplayer',
)
return {
'id': video_id,
'url': video_url,
'ext': 'mp3',
'filesize': int_or_none(recording.find('./size').text),
'title': title,
'upload_date': upload_date,
'view_count': view_count,
'duration': duration,
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
}
+106 -46
View File
@@ -1,21 +1,66 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
import base64
import re
import time
from .common import InfoExtractor
from ..utils import (
struct_unpack,
remove_end,
)
def _decrypt_url(png):
encrypted_data = base64.b64decode(png)
text_index = encrypted_data.find(b'tEXt')
text_chunk = encrypted_data[text_index - 4:]
length = struct_unpack('!I', text_chunk[:4])[0]
# Use bytearray to get integers when iterating in both python 2.x and 3.x
data = bytearray(text_chunk[8:8 + length])
data = [chr(b) for b in data if b != 0]
hash_index = data.index('#')
alphabet_data = data[:hash_index]
url_data = data[hash_index + 1:]
alphabet = []
e = 0
d = 0
for l in alphabet_data:
if d == 0:
alphabet.append(l)
d = e = (e + 1) % 4
else:
d -= 1
url = ''
f = 0
e = 3
b = 1
for letter in url_data:
if f == 0:
l = int(letter) * 10
f = 1
else:
if e == 0:
l += int(letter)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
return url
class RTVEALaCartaIE(InfoExtractor):
IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta'
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
_TEST = {
_TESTS = [{
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
'info_dict': {
@@ -23,48 +68,15 @@ class RTVEALaCartaIE(InfoExtractor):
'ext': 'mp4',
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
},
}
def _decrypt_url(self, png):
encrypted_data = base64.b64decode(png)
text_index = encrypted_data.find(b'tEXt')
text_chunk = encrypted_data[text_index-4:]
length = struct_unpack('!I', text_chunk[:4])[0]
# Use bytearray to get integers when iterating in both python 2.x and 3.x
data = bytearray(text_chunk[8:8+length])
data = [chr(b) for b in data if b != 0]
hash_index = data.index('#')
alphabet_data = data[:hash_index]
url_data = data[hash_index+1:]
alphabet = []
e = 0
d = 0
for l in alphabet_data:
if d == 0:
alphabet.append(l)
d = e = (e + 1) % 4
else:
d -= 1
url = ''
f = 0
e = 3
b = 1
for letter in url_data:
if f == 0:
l = int(letter)*10
f = 1
else:
if e == 0:
l += int(letter)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
return url
}, {
'note': 'Live stream',
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
'info_dict': {
'id': '1694255',
'ext': 'flv',
'title': 'TODO',
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -74,11 +86,59 @@ class RTVEALaCartaIE(InfoExtractor):
video_id)['page']['items'][0]
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
png = self._download_webpage(png_url, video_id, 'Downloading url information')
video_url = self._decrypt_url(png)
video_url = _decrypt_url(png)
return {
'id': video_id,
'title': info['title'],
'url': video_url,
'thumbnail': info['image'],
'thumbnail': info.get('image'),
'page_url': url,
}
class RTVELiveIE(InfoExtractor):
IE_NAME = 'rtve.es:live'
IE_DESC = 'RTVE.es live streams'
_VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias(?=/directo-la-1)|television)/(?P<id>[a-zA-Z0-9-]+)'
_TESTS = [{
'url': 'http://www.rtve.es/noticias/directo-la-1/',
'info_dict': {
'id': 'directo-la-1',
'ext': 'flv',
'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
},
'params': {
'skip_download': 'live stream',
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
start_time = time.gmtime()
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(
r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
title = remove_end(self._og_search_title(webpage), ' en directo')
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
vidplayer_id = self._search_regex(
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
png = self._download_webpage(png_url, video_id, 'Downloading url information')
video_url = _decrypt_url(png)
print(video_url)
return {
'id': video_id,
'ext': 'flv',
'title': title,
'url': video_url,
'app': 'rtve-live-live?ovpfv=2.1.2',
'player_url': player_url,
'rtmp_live': True,
}
+6
View File
@@ -1285,6 +1285,12 @@ def remove_start(s, start):
return s
def remove_end(s, end):
if s.endswith(end):
return s[:-len(end)]
return s
def url_basename(url):
path = compat_urlparse.urlparse(url).path
return path.strip(u'/').split(u'/')[-1]
+1 -1
View File
@@ -1,2 +1,2 @@
__version__ = '2014.08.22'
__version__ = '2014.08.22.3'