mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2026-06-15 00:50:13 +00:00
Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0f175a932f | |||
| 849b269273 | |||
| 95fa5fb569 | |||
| 77c3c5c5ed | |||
| 159444a668 | |||
| f9befee1f5 | |||
| 9471c44405 | |||
| 013bfdd84c | |||
| 46fd0dd5a5 | |||
| 4698f0d858 | |||
| 355d074ff9 | |||
| 7da224c907 | |||
| 1723edb1a5 | |||
| 4740864508 | |||
| 09a42738fc | |||
| df928d500f | |||
| a72cbfacf0 | |||
| 62a164e713 | |||
| 5f58165def | |||
| bd4e40df1a | |||
| 1419fafd36 |
@@ -173,7 +173,6 @@ from .jadorecettepub import JadoreCettePubIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kankan import KankanIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
@@ -316,6 +315,7 @@ from .sbs import SBSIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexykarma import SexyKarmaIE
|
||||
from .shared import SharedIE
|
||||
from .sharesix import ShareSixIE
|
||||
from .sina import SinaIE
|
||||
@@ -395,6 +395,7 @@ from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
from .tvplay import TVPlayIE
|
||||
from .twitch import TwitchIE
|
||||
from .ubu import UbuIE
|
||||
from .udemy import (
|
||||
UdemyIE,
|
||||
|
||||
@@ -160,6 +160,9 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
format_id = '%s-%s' % (quality, format_info['versionCode'])
|
||||
else:
|
||||
format_id = quality
|
||||
media_type = format_info.get('mediaType')
|
||||
if media_type is not None:
|
||||
format_id += '-%s' % media_type
|
||||
info = {
|
||||
'format_id': format_id,
|
||||
'format_note': format_info.get('versionLibelle'),
|
||||
|
||||
@@ -87,6 +87,15 @@ class BrightcoveIE(InfoExtractor):
|
||||
'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
|
||||
},
|
||||
},
|
||||
{
|
||||
# playlist test
|
||||
# from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||
'info_dict': {
|
||||
'title': 'Sealife',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -281,6 +281,12 @@ class InfoExtractor(object):
|
||||
raw_filename = basen + '.dump'
|
||||
filename = sanitize_filename(raw_filename, restricted=True)
|
||||
self.to_screen('Saving request to ' + filename)
|
||||
# Working around MAX_PATH limitation on Windows (see
|
||||
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
|
||||
if os.name == 'nt':
|
||||
absfilepath = os.path.abspath(filename)
|
||||
if len(absfilepath) > 259:
|
||||
filename = '\\\\?\\' + absfilepath
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
|
||||
|
||||
@@ -34,6 +34,8 @@ class CondeNastIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
|
||||
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
|
||||
|
||||
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed)/.+?' % '|'.join(_SITES.keys())
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
|
||||
'md5': '1921f713ed48aabd715691f774c451f7',
|
||||
|
||||
@@ -28,6 +28,7 @@ from .brightcove import BrightcoveIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .rutv import RUTVIE
|
||||
from .smotri import SmotriIE
|
||||
from .condenast import CondeNastIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -608,13 +609,13 @@ class GenericIE(InfoExtractor):
|
||||
if mobj:
|
||||
player_url = unescapeHTML(mobj.group('url'))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
return self.url_result(surl, 'Vimeo')
|
||||
return self.url_result(surl)
|
||||
|
||||
# Look for embedded (swf embed) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||
if mobj:
|
||||
return self.url_result(mobj.group(1), 'Vimeo')
|
||||
return self.url_result(mobj.group(1))
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(r'''(?x)
|
||||
@@ -653,15 +654,17 @@ class GenericIE(InfoExtractor):
|
||||
match = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
||||
if match:
|
||||
embed_url = self._proto_relative_url(
|
||||
unescapeHTML(match.group('url')))
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': unescapeHTML(match.group('url')),
|
||||
'url': embed_url,
|
||||
'ie_key': 'Wistia',
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'id': video_id,
|
||||
}
|
||||
match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||
if match:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -847,6 +850,12 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'MLB')
|
||||
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
|
||||
|
||||
def check_video(vurl):
|
||||
vpath = compat_urlparse.urlparse(vurl).path
|
||||
vext = determine_ext(vpath)
|
||||
|
||||
@@ -1,155 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
formatSeconds,
|
||||
)
|
||||
|
||||
|
||||
class JustinTVIE(InfoExtractor):
|
||||
"""Information extractor for justin.tv and twitch.tv"""
|
||||
# TODO: One broadcast may be split into multiple videos. The key
|
||||
# 'broadcast_id' is the same for all parts, and 'broadcast_part'
|
||||
# starts at 1 and increases. Can we treat all parts as one video?
|
||||
|
||||
_VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
|
||||
(?:
|
||||
(?P<channelid>[^/]+)|
|
||||
(?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
|
||||
(?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
|
||||
)
|
||||
/?(?:\#.*)?$
|
||||
"""
|
||||
_JUSTIN_PAGE_LIMIT = 100
|
||||
IE_NAME = 'justin.tv'
|
||||
IE_DESC = 'justin.tv and twitch.tv'
|
||||
_TEST = {
|
||||
'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
|
||||
'md5': 'ecaa8a790c22a40770901460af191c9a',
|
||||
'info_dict': {
|
||||
'id': '296128360',
|
||||
'ext': 'flv',
|
||||
'upload_date': '20110927',
|
||||
'uploader_id': 25114803,
|
||||
'uploader': 'thegamedevhub',
|
||||
'title': 'Beginner Series - Scripting With Python Pt.1'
|
||||
}
|
||||
}
|
||||
|
||||
# Return count of items, list of *valid* items
|
||||
def _parse_page(self, url, video_id, counter):
|
||||
info_json = self._download_webpage(
|
||||
url, video_id,
|
||||
'Downloading video info JSON on page %d' % counter,
|
||||
'Unable to download video info JSON %d' % counter)
|
||||
|
||||
response = json.loads(info_json)
|
||||
if type(response) != list:
|
||||
error_text = response.get('error', 'unknown error')
|
||||
raise ExtractorError('Justin.tv API: %s' % error_text)
|
||||
info = []
|
||||
for clip in response:
|
||||
video_url = clip['video_file_url']
|
||||
if video_url:
|
||||
video_extension = os.path.splitext(video_url)[1][1:]
|
||||
video_date = re.sub('-', '', clip['start_time'][:10])
|
||||
video_uploader_id = clip.get('user_id', clip.get('channel_id'))
|
||||
video_id = clip['id']
|
||||
video_title = clip.get('title', video_id)
|
||||
info.append({
|
||||
'id': compat_str(video_id),
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'uploader': clip.get('channel_name', video_uploader_id),
|
||||
'uploader_id': video_uploader_id,
|
||||
'upload_date': video_date,
|
||||
'ext': video_extension,
|
||||
})
|
||||
return (len(response), info)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
api_base = 'http://api.justin.tv'
|
||||
paged = False
|
||||
if mobj.group('channelid'):
|
||||
paged = True
|
||||
video_id = mobj.group('channelid')
|
||||
api = api_base + '/channel/archives/%s.json' % video_id
|
||||
elif mobj.group('chapterid'):
|
||||
chapter_id = mobj.group('chapterid')
|
||||
|
||||
webpage = self._download_webpage(url, chapter_id)
|
||||
m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
|
||||
if not m:
|
||||
raise ExtractorError('Cannot find archive of a chapter')
|
||||
archive_id = m.group(1)
|
||||
|
||||
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
||||
doc = self._download_xml(
|
||||
api, chapter_id,
|
||||
note='Downloading chapter information',
|
||||
errnote='Chapter information download failed')
|
||||
for a in doc.findall('.//archive'):
|
||||
if archive_id == a.find('./id').text:
|
||||
break
|
||||
else:
|
||||
raise ExtractorError('Could not find chapter in chapter information')
|
||||
|
||||
video_url = a.find('./video_file_url').text
|
||||
video_ext = video_url.rpartition('.')[2] or 'flv'
|
||||
|
||||
chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
|
||||
chapter_info = self._download_json(
|
||||
chapter_api_url, 'c' + chapter_id,
|
||||
note='Downloading chapter metadata',
|
||||
errnote='Download of chapter metadata failed')
|
||||
|
||||
bracket_start = int(doc.find('.//bracket_start').text)
|
||||
bracket_end = int(doc.find('.//bracket_end').text)
|
||||
|
||||
# TODO determine start (and probably fix up file)
|
||||
# youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
|
||||
#video_url += '?start=' + TODO:start_timestamp
|
||||
# bracket_start is 13290, but we want 51670615
|
||||
self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
|
||||
'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
|
||||
|
||||
info = {
|
||||
'id': 'c' + chapter_id,
|
||||
'url': video_url,
|
||||
'ext': video_ext,
|
||||
'title': chapter_info['title'],
|
||||
'thumbnail': chapter_info['preview'],
|
||||
'description': chapter_info['description'],
|
||||
'uploader': chapter_info['channel']['display_name'],
|
||||
'uploader_id': chapter_info['channel']['name'],
|
||||
}
|
||||
return info
|
||||
else:
|
||||
video_id = mobj.group('videoid')
|
||||
api = api_base + '/broadcast/by_archive/%s.json' % video_id
|
||||
|
||||
entries = []
|
||||
offset = 0
|
||||
limit = self._JUSTIN_PAGE_LIMIT
|
||||
for counter in itertools.count(1):
|
||||
page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
|
||||
page_count, page_info = self._parse_page(
|
||||
page_url, video_id, counter)
|
||||
entries.extend(page_info)
|
||||
if not paged or page_count != limit:
|
||||
break
|
||||
offset += limit
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'entries': entries,
|
||||
}
|
||||
@@ -33,22 +33,22 @@ class MixcloudIE(InfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
def check_urls(self, url_list):
|
||||
"""Returns 1st active url from list"""
|
||||
for url in url_list:
|
||||
def _get_url(self, track_id, template_url):
|
||||
server_count = 30
|
||||
for i in range(server_count):
|
||||
url = template_url % i
|
||||
try:
|
||||
# We only want to know if the request succeed
|
||||
# don't download the whole file
|
||||
self._request_webpage(HEADRequest(url), None, False)
|
||||
self._request_webpage(
|
||||
HEADRequest(url), track_id,
|
||||
'Checking URL %d/%d ...' % (i + 1, server_count + 1))
|
||||
return url
|
||||
except ExtractorError:
|
||||
url = None
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def _get_url(self, template_url):
|
||||
return self.check_urls(template_url % i for i in range(30))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader = mobj.group(1)
|
||||
@@ -61,11 +61,11 @@ class MixcloudIE(InfoExtractor):
|
||||
r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
|
||||
song_url = preview_url.replace('/previews/', '/c/originals/')
|
||||
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
||||
final_song_url = self._get_url(template_url)
|
||||
final_song_url = self._get_url(track_id, template_url)
|
||||
if final_song_url is None:
|
||||
self.to_screen('Trying with m4a extension')
|
||||
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
||||
final_song_url = self._get_url(template_url)
|
||||
final_song_url = self._get_url(track_id, template_url)
|
||||
if final_song_url is None:
|
||||
raise ExtractorError('Unable to extract track url')
|
||||
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SexyKarmaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sexykarma\.com/gonewild/video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
|
||||
'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
|
||||
'info_dict': {
|
||||
'id': 'yHI70cOyIHt',
|
||||
'display_id': 'taking-a-quick-pee',
|
||||
'ext': 'mp4',
|
||||
'title': 'Taking a quick pee.',
|
||||
'description': '',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'wildginger7',
|
||||
'upload_date': '20141007',
|
||||
'duration': 81,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
|
||||
'md5': 'dd216c68d29b49b12842b9babe762a5d',
|
||||
'info_dict': {
|
||||
'id': '8Id6EZPbuHf',
|
||||
'display_id': 'pot-pixie-tribute',
|
||||
'ext': 'mp4',
|
||||
'title': 'pot_pixie tribute',
|
||||
'description': 'tribute',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'banffite',
|
||||
'upload_date': '20141013',
|
||||
'duration': 16,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<p>Save this video to your computer: </p><p><a href="([^"]+)"',
|
||||
webpage, 'url')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="he2"><span>(.*?)</span>',
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description', fatal=False, default='')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<span id="container"><img\s+src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'class="aupa">\s*(.*?)</a>',
|
||||
webpage, 'uploader')
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
categories = self._html_search_meta(
|
||||
'keywords', webpage, 'categories',
|
||||
fatal=False, default='').split(',')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
}
|
||||
@@ -65,6 +65,22 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
'title': 'Who are the hackers?',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
# contains a youtube video
|
||||
'url': 'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything',
|
||||
'add_ie': ['Youtube'],
|
||||
'info_dict': {
|
||||
'id': '_ZG8HBuDjgc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Douglas Adams: Parrots the Universe and Everything',
|
||||
'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
|
||||
'uploader': 'University of California Television (UCTV)',
|
||||
'uploader_id': 'UCtelevision',
|
||||
'upload_date': '20080522',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_NATIVE_FORMATS = {
|
||||
@@ -114,6 +130,13 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
|
||||
talk_info = self._extract_info(webpage)['talks'][0]
|
||||
|
||||
if talk_info.get('external') is not None:
|
||||
self.to_screen('Found video from %s' % talk_info['external']['service'])
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': talk_info['external']['uri'],
|
||||
}
|
||||
|
||||
formats = [{
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
|
||||
@@ -0,0 +1,187 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class TwitchIE(InfoExtractor):
|
||||
# TODO: One broadcast may be split into multiple videos. The key
|
||||
# 'broadcast_id' is the same for all parts, and 'broadcast_part'
|
||||
# starts at 1 and increases. Can we treat all parts as one video?
|
||||
_VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/
|
||||
(?:
|
||||
(?P<channelid>[^/]+)|
|
||||
(?:(?:[^/]+)/b/(?P<videoid>[^/]+))|
|
||||
(?:(?:[^/]+)/c/(?P<chapterid>[^/]+))
|
||||
)
|
||||
/?(?:\#.*)?$
|
||||
"""
|
||||
_PAGE_LIMIT = 100
|
||||
_API_BASE = 'https://api.twitch.tv'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.twitch.tv/riotgames/b/577357806',
|
||||
'info_dict': {
|
||||
'id': 'a577357806',
|
||||
'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'http://www.twitch.tv/acracingleague/c/5285812',
|
||||
'info_dict': {
|
||||
'id': 'c5285812',
|
||||
'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'http://www.twitch.tv/vanillatv',
|
||||
'info_dict': {
|
||||
'id': 'vanillatv',
|
||||
'title': 'VanillaTV',
|
||||
},
|
||||
'playlist_mincount': 412,
|
||||
}]
|
||||
|
||||
def _handle_error(self, response):
|
||||
if not isinstance(response, dict):
|
||||
return
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
|
||||
expected=True)
|
||||
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
|
||||
response = super(TwitchIE, self)._download_json(url, video_id, note)
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
def _extract_media(self, item, item_id):
|
||||
ITEMS = {
|
||||
'a': 'video',
|
||||
'c': 'chapter',
|
||||
}
|
||||
info = self._extract_info(self._download_json(
|
||||
'%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
|
||||
'Downloading %s info JSON' % ITEMS[item]))
|
||||
response = self._download_json(
|
||||
'%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id,
|
||||
'Downloading %s playlist JSON' % ITEMS[item])
|
||||
entries = []
|
||||
chunks = response['chunks']
|
||||
qualities = list(chunks.keys())
|
||||
for num, fragment in enumerate(zip(*chunks.values()), start=1):
|
||||
formats = []
|
||||
for fmt_num, fragment_fmt in enumerate(fragment):
|
||||
format_id = qualities[fmt_num]
|
||||
fmt = {
|
||||
'url': fragment_fmt['url'],
|
||||
'format_id': format_id,
|
||||
'quality': 1 if format_id == 'live' else 0,
|
||||
}
|
||||
m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
|
||||
if m:
|
||||
fmt['height'] = int(m.group('height'))
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
entry = dict(info)
|
||||
entry['id'] = '%s_%d' % (entry['id'], num)
|
||||
entry['title'] = '%s part %d' % (entry['title'], num)
|
||||
entry['formats'] = formats
|
||||
entries.append(entry)
|
||||
return self.playlist_result(entries, info['id'], info['title'])
|
||||
|
||||
def _extract_info(self, info):
|
||||
return {
|
||||
'id': info['_id'],
|
||||
'title': info['title'],
|
||||
'description': info['description'],
|
||||
'duration': info['length'],
|
||||
'thumbnail': info['preview'],
|
||||
'uploader': info['channel']['display_name'],
|
||||
'uploader_id': info['channel']['name'],
|
||||
'timestamp': parse_iso8601(info['recorded_at']),
|
||||
'view_count': info['views'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj.group('chapterid'):
|
||||
return self._extract_media('c', mobj.group('chapterid'))
|
||||
|
||||
"""
|
||||
webpage = self._download_webpage(url, chapter_id)
|
||||
m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
|
||||
if not m:
|
||||
raise ExtractorError('Cannot find archive of a chapter')
|
||||
archive_id = m.group(1)
|
||||
|
||||
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
|
||||
doc = self._download_xml(
|
||||
api, chapter_id,
|
||||
note='Downloading chapter information',
|
||||
errnote='Chapter information download failed')
|
||||
for a in doc.findall('.//archive'):
|
||||
if archive_id == a.find('./id').text:
|
||||
break
|
||||
else:
|
||||
raise ExtractorError('Could not find chapter in chapter information')
|
||||
|
||||
video_url = a.find('./video_file_url').text
|
||||
video_ext = video_url.rpartition('.')[2] or 'flv'
|
||||
|
||||
chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
|
||||
chapter_info = self._download_json(
|
||||
chapter_api_url, 'c' + chapter_id,
|
||||
note='Downloading chapter metadata',
|
||||
errnote='Download of chapter metadata failed')
|
||||
|
||||
bracket_start = int(doc.find('.//bracket_start').text)
|
||||
bracket_end = int(doc.find('.//bracket_end').text)
|
||||
|
||||
# TODO determine start (and probably fix up file)
|
||||
# youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
|
||||
#video_url += '?start=' + TODO:start_timestamp
|
||||
# bracket_start is 13290, but we want 51670615
|
||||
self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
|
||||
'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
|
||||
|
||||
info = {
|
||||
'id': 'c' + chapter_id,
|
||||
'url': video_url,
|
||||
'ext': video_ext,
|
||||
'title': chapter_info['title'],
|
||||
'thumbnail': chapter_info['preview'],
|
||||
'description': chapter_info['description'],
|
||||
'uploader': chapter_info['channel']['display_name'],
|
||||
'uploader_id': chapter_info['channel']['name'],
|
||||
}
|
||||
return info
|
||||
"""
|
||||
elif mobj.group('videoid'):
|
||||
return self._extract_media('a', mobj.group('videoid'))
|
||||
elif mobj.group('channelid'):
|
||||
channel_id = mobj.group('channelid')
|
||||
info = self._download_json(
|
||||
'%s/kraken/channels/%s' % (self._API_BASE, channel_id),
|
||||
channel_id, 'Downloading channel info JSON')
|
||||
channel_name = info.get('display_name') or info.get('name')
|
||||
entries = []
|
||||
offset = 0
|
||||
limit = self._PAGE_LIMIT
|
||||
for counter in itertools.count(1):
|
||||
response = self._download_json(
|
||||
'%s/kraken/channels/%s/videos/?offset=%d&limit=%d'
|
||||
% (self._API_BASE, channel_id, offset, limit),
|
||||
channel_id, 'Downloading channel videos JSON page %d' % counter)
|
||||
videos = response['videos']
|
||||
if not videos:
|
||||
break
|
||||
entries.extend([self.url_result(video['url'], 'Twitch') for video in videos])
|
||||
offset += limit
|
||||
return self.playlist_result(entries, channel_id, channel_name)
|
||||
@@ -157,6 +157,18 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'duration': 62,
|
||||
}
|
||||
},
|
||||
{
|
||||
# from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
|
||||
'url': 'https://player.vimeo.com/video/98044508',
|
||||
'note': 'The js code contains assignments to the same variable as the config',
|
||||
'info_dict': {
|
||||
'id': '98044508',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pier Solar OUYA Official Trailer',
|
||||
'uploader': 'Tulio Gonçalves',
|
||||
'uploader_id': 'user28849593',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
@@ -244,7 +256,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
# We try to find out to which variable is assigned the config dic
|
||||
m_variable_name = re.search('(\w)\.video\.id', webpage)
|
||||
if m_variable_name is not None:
|
||||
config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1))
|
||||
config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
|
||||
else:
|
||||
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
|
||||
config = self._search_regex(config_re, webpage, 'info section',
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.10.13'
|
||||
__version__ = '2014.10.18'
|
||||
|
||||
Reference in New Issue
Block a user