mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-22 07:56:49 +00:00
Update PR with back-port from its development in yt-dlp
This commit is contained in:
parent
a0f69f9526
commit
ddbadd037f
@ -569,7 +569,6 @@
|
||||
- **ndr:embed**
|
||||
- **ndr:embed:base**
|
||||
- **NDTV**
|
||||
- **Nebula**
|
||||
- **NerdCubedFeed**
|
||||
- **netease:album**: 网易云音乐 - 专辑
|
||||
- **netease:djradio**: 网易云音乐 - 电台
|
||||
|
@ -731,7 +731,12 @@ from .ndr import (
|
||||
NJoyEmbedIE,
|
||||
)
|
||||
from .ndtv import NDTVIE
|
||||
from .nebula import NebulaIE
|
||||
from .nebula import (
|
||||
NebulaIE,
|
||||
NebulaChannelIE,
|
||||
NebulaClassIE,
|
||||
NebulaSubscriptionsIE,
|
||||
)
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .neteasemusic import (
|
||||
|
@ -1,320 +1,573 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import os
|
||||
import itertools
|
||||
|
||||
from .art19 import Art19IE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote, compat_str
|
||||
from ..utils import parse_iso8601, ExtractorError, try_get, urljoin, sanitized_Request
|
||||
from ..compat import (
|
||||
compat_HTTPError as HTTPError,
|
||||
compat_kwargs,
|
||||
compat_str as str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
json_stringify,
|
||||
# make_archive_id,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
T,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unsmuggle_url,
|
||||
update_url,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
|
||||
|
||||
|
||||
class NebulaIE(InfoExtractor):
|
||||
"""
|
||||
Nebula (https://watchnebula.com/) is a video platform created by the streamer community Standard. It hosts videos
|
||||
off-YouTube from a small hand-picked group of creators.
|
||||
|
||||
All videos require a subscription to watch. There are no known freely available videos. An authentication token to
|
||||
an account with a valid subscription can be specified in multiple ways, including credentials in .netrc or a cookie
|
||||
jar.
|
||||
As neither of these parameters appear to be supported by the unit test runner, it's recommended to set the envvar
|
||||
NEBULA_TOKEN to execute the test runs.
|
||||
|
||||
Nebula uses the Zype video infrastructure and this extractor is using the 'url_transparent' mode to hand off
|
||||
video extraction to the Zype extractor.
|
||||
|
||||
This description has been last updated on 2020-10-22.
|
||||
"""
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?watchnebula\.com/videos/(?P<id>[-\w]+)' # the 'id' group is actually the display_id, but we misname it 'id' to be able to use _match_id()
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://watchnebula.com/videos/that-time-disney-remade-beauty-and-the-beast',
|
||||
'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',
|
||||
'info_dict': {
|
||||
'id': '5c271b40b13fd613090034fd',
|
||||
'ext': 'mp4',
|
||||
'title': 'That Time Disney Remade Beauty and the Beast',
|
||||
'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
|
||||
'upload_date': '20180731',
|
||||
'timestamp': 1533009600,
|
||||
'channel': 'Lindsay Ellis',
|
||||
'uploader': 'Lindsay Ellis',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://watchnebula.com/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
|
||||
'md5': '6d4edd14ce65720fa63aba5c583fb328',
|
||||
'info_dict': {
|
||||
'id': '5e7e78171aaf320001fbd6be',
|
||||
'ext': 'mp4',
|
||||
'title': 'Landing Craft - How The Allies Got Ashore',
|
||||
'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
|
||||
'upload_date': '20200327',
|
||||
'timestamp': 1585348140,
|
||||
'channel': 'The Logistics of D-Day',
|
||||
'uploader': 'The Logistics of D-Day',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
|
||||
'md5': '8c7d272910eea320f6f8e6d3084eecf5',
|
||||
'info_dict': {
|
||||
'id': '5e779ebdd157bc0001d1c75a',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 1: The Draw',
|
||||
'description': r'contains:There’s free money on offer… if the players can all work together.',
|
||||
'upload_date': '20200323',
|
||||
'timestamp': 1584980400,
|
||||
'channel': 'Tom Scott Presents: Money',
|
||||
'uploader': 'Tom Scott Presents: Money',
|
||||
}
|
||||
},
|
||||
]
|
||||
_WORKING = True # FIXME: should this be set to False, to hide the tests from CI, given that the unit tests require an auth cookie of a (paid) subscription?
|
||||
class NebulaBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'watchnebula'
|
||||
_token = _api_token = None
|
||||
|
||||
def _perform_login(self, username, password, video_id):
|
||||
"""
|
||||
Log in to Nebula, authenticating using a given username and password.
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
Returns a Nebula token, as the frontend would store it in the
|
||||
nebula-auth cookie. Or False, if authentication fails.
|
||||
"""
|
||||
data = json.dumps({'email': username, 'password': password}).encode('utf8')
|
||||
request = sanitized_Request(method='POST',
|
||||
url='https://api.watchnebula.com/api/v1/auth/login/',
|
||||
data=data,
|
||||
headers={
|
||||
'content-type': 'application/json',
|
||||
# Overwrite the cookie headers, because
|
||||
# submitting the 'sessionid' cookie
|
||||
# always causes a 403 on auth endpoint
|
||||
'cookie': ''})
|
||||
response = self._download_json(request, fatal=False, video_id=video_id,
|
||||
note='Authenticating to Nebula with supplied credentials',
|
||||
errnote='Authentication failed or rejected')
|
||||
if not response or 'key' not in response:
|
||||
return False
|
||||
return response['key']
|
||||
def _login(self):
|
||||
if not self._api_token:
|
||||
self._api_token = try_call(
|
||||
lambda: self._get_cookies('https://nebula.tv')['nebula_auth.apiToken'].value)
|
||||
self._token = self._download_json(
|
||||
'https://users.api.nebula.app/api/v1/authorization/', None,
|
||||
headers={'Authorization': 'Token {0}'.format(self._api_token)} if self._api_token else {},
|
||||
note='Authorizing to Nebula', data=b'')['token']
|
||||
if self._token:
|
||||
return
|
||||
|
||||
def _retrieve_nebula_auth(self, video_id):
|
||||
"""
|
||||
Attempt to find a Nebula API token. Makes multiple attempts in the
|
||||
following order:
|
||||
a) login credentials used to authenticate to the Nebula login endpoint,
|
||||
either from .netrc or specified using --username/--password
|
||||
b) the --cookies supplied cookie jar
|
||||
c) the NEBULA_TOKEN environment variable
|
||||
d) the --video-password command line argument (this isn't documented in
|
||||
the error message, because probably highly unpopular)
|
||||
If none of these are successful, an end user-intended error message is
|
||||
raised, listing some solutions.
|
||||
|
||||
Returns a Nebula API token, which subsequently can be used to make
|
||||
authenticated calls to the Nebula API.
|
||||
"""
|
||||
nebula_token = None
|
||||
|
||||
# option #1: login credentials via .netrc or --username and --password
|
||||
username, password = self._get_login_info()
|
||||
if username and password:
|
||||
self.to_screen('Authenticating to Nebula using .netrc or command line-supplied credentials')
|
||||
nebula_token = self._perform_login(username, password, video_id)
|
||||
if username is None:
|
||||
return
|
||||
self._perform_login(username, password)
|
||||
|
||||
# option #2: nebula token via cookie jar
|
||||
if not nebula_token:
|
||||
# TODO: is there a helper to do all this cookie extraction?
|
||||
nebula_cookies = self._get_cookies('https://watchnebula.com')
|
||||
nebula_cookie = nebula_cookies.get('nebula-auth')
|
||||
if nebula_cookie:
|
||||
self.to_screen('Authenticating to Nebula with credentials from cookie jar')
|
||||
nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value)
|
||||
nebula_token = self._parse_json(nebula_cookie_value, video_id).get('apiToken')
|
||||
def _perform_login(self, username, password):
|
||||
try:
|
||||
response = self._download_json(
|
||||
'https://nebula.tv/auth/login/', None,
|
||||
'Logging in to Nebula', 'Login failed',
|
||||
data=json_stringify({'email': username, 'password': password}),
|
||||
headers={'content-type': 'application/json'})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError('Login failed: Invalid username or password', expected=True)
|
||||
raise
|
||||
self._api_token = traverse_obj(response, ('key', T(str)))
|
||||
if not self._api_token:
|
||||
raise ExtractorError('Login failed: No token')
|
||||
|
||||
# option #3: nebula token via environment variable
|
||||
if not nebula_token and 'NEBULA_TOKEN' in os.environ:
|
||||
nebula_token = os.environ.get('NEBULA_TOKEN')
|
||||
if nebula_token:
|
||||
self.to_screen('Authenticating to Nebula with token from NEBULA_TOKEN environment variable')
|
||||
def _call_api(self, *args, **kwargs):
|
||||
|
||||
# option #4: nebula token via --videopassword
|
||||
if not nebula_token:
|
||||
nebula_token = self._downloader.params.get('videopassword')
|
||||
if nebula_token:
|
||||
self.to_screen('Authenticating to Nebula with token from --videopassword')
|
||||
def kwargs_set_token(kw):
|
||||
kw.setdefault('headers', {})['Authorization'] = 'Bearer {0}'.format(self._token)
|
||||
return compat_kwargs(kw)
|
||||
|
||||
if not nebula_token:
|
||||
raise ExtractorError('Nebula requires an account with an active subscription. '
|
||||
'You can supply your authentication information by either '
|
||||
'a) storing your credentials in .netrc or supplying them via --username and --password, or '
|
||||
'b) passing in a cookie jar containing a nebula-auth cookie via --cookies, or '
|
||||
'c) setting the environment variable NEBULA_TOKEN.')
|
||||
return nebula_token
|
||||
if self._token:
|
||||
kwargs = kwargs_set_token(kwargs)
|
||||
try:
|
||||
return self._download_json(*args, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
|
||||
raise
|
||||
self.to_screen(
|
||||
'Reauthorizing with Nebula and retrying, because last API '
|
||||
'call resulted in error {0}'.format(e.cause.status))
|
||||
self._real_initialize()
|
||||
if self._token:
|
||||
kwargs = kwargs_set_token(kwargs)
|
||||
return self._download_json(*args, **kwargs)
|
||||
|
||||
def _retrieve_zype_api_key(self, page_url, display_id):
|
||||
"""
|
||||
Retrieves the Zype API key required to make calls to the Zype API.
|
||||
def _extract_formats(self, content_id, slug):
|
||||
for retry in (False, True):
|
||||
try:
|
||||
# fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
fmts, subs = self._extract_m3u8_formats(
|
||||
'https://content.api.nebula.app/{0}s/{1}/manifest.m3u8'.format(
|
||||
content_id.split(':', 1)[0], content_id),
|
||||
slug, 'mp4', query={
|
||||
'token': self._token,
|
||||
'app_version': '23.10.0',
|
||||
'platform': 'ios',
|
||||
}), {}
|
||||
self._sort_formats(fmts)
|
||||
return {'formats': fmts, 'subtitles': subs}
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError):
|
||||
raise
|
||||
if e.cause.status == 401:
|
||||
self.raise_login_required()
|
||||
if not retry and e.cause.status == 403:
|
||||
self.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error')
|
||||
self._real_initialize()
|
||||
continue
|
||||
raise
|
||||
|
||||
Unfortunately, the Nebula frontend stores this as a JS object literal in one of its JS chunks,
|
||||
looking somewhat like this (but minified):
|
||||
def _extract_video_metadata(self, episode):
|
||||
channel_url = traverse_obj(
|
||||
episode, (('channel_slug', 'class_slug'), T(lambda u: urljoin('https://nebula.tv/', u))), get_all=False)
|
||||
return merge_dicts({
|
||||
'id': episode['id'].partition(':')[2],
|
||||
'title': episode['title'],
|
||||
'channel_url': channel_url,
|
||||
'uploader_url': channel_url,
|
||||
}, traverse_obj(episode, {
|
||||
'display_id': 'slug',
|
||||
'description': 'description',
|
||||
'timestamp': ('published_at', T(parse_iso8601)),
|
||||
'duration': ('duration', T(int_or_none)),
|
||||
'channel_id': 'channel_slug',
|
||||
'uploader_id': 'channel_slug',
|
||||
'channel': 'channel_title',
|
||||
'uploader': 'channel_title',
|
||||
'series': 'channel_title',
|
||||
'creator': 'channel_title',
|
||||
'thumbnail': ('images', 'thumbnail', 'src', T(url_or_none)),
|
||||
'episode_number': ('order', {int_or_none}),
|
||||
# Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
|
||||
# '_old_archive_ids': ('zype_id', {lambda x: [
|
||||
# make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
|
||||
}))
|
||||
|
||||
return {
|
||||
NODE_ENV: "production",
|
||||
REACT_APP_NAME: "Nebula",
|
||||
REACT_APP_NEBULA_API: "https://api.watchnebula.com/api/v1/",
|
||||
REACT_APP_ZYPE_API: "https://api.zype.com/",
|
||||
REACT_APP_ZYPE_API_KEY: "<redacted>",
|
||||
REACT_APP_ZYPE_APP_KEY: "<redacted>",
|
||||
// ...
|
||||
}
|
||||
|
||||
So we have to find the reference to the chunk in the video page (as it is hashed and the hash will
|
||||
change when they do a new release), then download the chunk and extract the API key from there,
|
||||
hoping they won't rename the constant.
|
||||
|
||||
Alternatively, it is currently hardcoded and shared among all users. We haven't seen it
|
||||
change so far, so we could also just hardcode it in the extractor as a fallback.
|
||||
"""
|
||||
# fetch the video page
|
||||
webpage = self._download_webpage(page_url, video_id=display_id)
|
||||
|
||||
# find the script tag with a file named 'main.<hash>.chunk.js' in there
|
||||
main_script_relpath = self._search_regex(
|
||||
r'<script[^>]*src="(?P<script_relpath>[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage,
|
||||
group='script_relpath', name='script relative path', fatal=True)
|
||||
|
||||
# fetch the JS chunk
|
||||
main_script_abspath = urljoin(page_url, main_script_relpath)
|
||||
main_script = self._download_webpage(main_script_abspath, video_id=display_id,
|
||||
note='Retrieving Zype API key')
|
||||
|
||||
# find the API key named 'REACT_APP_ZYPE_API_KEY' in there
|
||||
api_key = self._search_regex(
|
||||
r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P<api_key>[\w-]*)"', main_script,
|
||||
group='api_key', name='API key', fatal=True)
|
||||
|
||||
return api_key
|
||||
|
||||
def _call_zype_api(self, path, params, video_id, api_key, note):
|
||||
"""
|
||||
A helper for making calls to the Zype API.
|
||||
"""
|
||||
query = {'api_key': api_key, 'per_page': 1}
|
||||
query.update(params)
|
||||
return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note)
|
||||
|
||||
def _fetch_zype_video_data(self, display_id, api_key):
|
||||
"""
|
||||
Fetch video meta data from the Zype API.
|
||||
"""
|
||||
response = self._call_zype_api('/videos', {'friendly_title': display_id},
|
||||
display_id, api_key, note='Retrieving metadata from Zype')
|
||||
if 'response' not in response or len(response['response']) != 1:
|
||||
raise ExtractorError('Unable to find video on Zype API')
|
||||
return response['response'][0]
|
||||
|
||||
def _call_nebula_api(self, path, video_id, access_token, note):
|
||||
"""
|
||||
A helper for making calls to the Nebula API.
|
||||
"""
|
||||
return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={
|
||||
'Authorization': 'Token {access_token}'.format(access_token=access_token)
|
||||
}, note=note)
|
||||
|
||||
def _fetch_zype_access_token(self, video_id, nebula_token):
|
||||
"""
|
||||
Requests a Zype access token from the Nebula API.
|
||||
"""
|
||||
user_object = self._call_nebula_api('/auth/user/', video_id, nebula_token, note='Retrieving Zype access token')
|
||||
access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str)
|
||||
if not access_token:
|
||||
if try_get(user_object, lambda x: x['is_subscribed'], bool):
|
||||
raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint, please try loading an arbitrary video in a browser with this account to ''prime'' it for video downloading')
|
||||
raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
|
||||
return access_token
|
||||
|
||||
def _build_video_url(self, video_id, zype_access_token):
|
||||
"""
|
||||
Construct a Zype video URL (as supported by the Zype extractor), given a Zype video ID and a Zype access token.
|
||||
"""
|
||||
return 'https://player.zype.com/embed/{video_id}.html?access_token={access_token}'.format(
|
||||
video_id=video_id,
|
||||
access_token=zype_access_token)
|
||||
|
||||
def _extract_channel(self, video_meta):
|
||||
"""
|
||||
Extract the channel title, by going through the list of categories and finding the first value of the
|
||||
first category that has a value.
|
||||
|
||||
I know this look like a terrible approach. But actually, it's just reproducing the behavior of the
|
||||
React code the Nebula frontend uses (as of 2020-04-07):
|
||||
|
||||
let channel;
|
||||
if (video && video.categories && video.categories.length) {
|
||||
const channelTitle = video.categories.map((category) => (category.value[0]))
|
||||
.filter((title) => (!!title))[0];
|
||||
channel = getChannelByTitle(state, { title: channelTitle });
|
||||
}
|
||||
|
||||
Basically, it finds the first (truthy) value in the category list and that's assumed to be the
|
||||
channel title. And then the channel details (e.g. the URL) are looked up by title (!) (not by any
|
||||
kind of ID) via an additional API call.
|
||||
|
||||
TODO: Implement the API calls giving us the channel list, so that we can do the title lookup and then figure out the channel URL
|
||||
|
||||
May return None of no category list could be found or no category had a label ('value').
|
||||
"""
|
||||
categories = video_meta.get('categories', []) if video_meta else []
|
||||
for category in categories:
|
||||
if category.get('value'): # we're intentionally not using "'value' in category" here, because the expression is supposed to be falsy for empty lists in category['value'] as well!
|
||||
return category['value'][0]
|
||||
class NebulaIE(NebulaBaseIE):
|
||||
IE_NAME = 'nebula:video'
|
||||
_VALID_URL = r'{0}/videos/(?P<id>[\w-]+)'.format(_BASE_URL_RE)
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
|
||||
'info_dict': {
|
||||
'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
|
||||
'ext': 'mp4',
|
||||
'title': 'That Time Disney Remade Beauty and the Beast',
|
||||
'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
|
||||
'upload_date': '20180731',
|
||||
'timestamp': 1533009600,
|
||||
'channel': 'Lindsay Ellis',
|
||||
'channel_id': 'lindsayellis',
|
||||
'uploader': 'Lindsay Ellis',
|
||||
'uploader_id': 'lindsayellis',
|
||||
'uploader_url': r're:https://nebula\.(tv|app)/lindsayellis',
|
||||
'series': 'Lindsay Ellis',
|
||||
'display_id': 'that-time-disney-remade-beauty-and-the-beast',
|
||||
'channel_url': r're:https://nebula\.(tv|app)/lindsayellis',
|
||||
'creator': 'Lindsay Ellis',
|
||||
'duration': 2212,
|
||||
'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
|
||||
# '_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'],
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
|
||||
'md5': 'd05739cf6c38c09322422f696b569c23',
|
||||
'info_dict': {
|
||||
'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
|
||||
'ext': 'mp4',
|
||||
'title': 'Landing Craft - How The Allies Got Ashore',
|
||||
'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
|
||||
'upload_date': '20200327',
|
||||
'timestamp': 1585348140,
|
||||
'channel': 'Real Engineering — The Logistics of D-Day',
|
||||
'channel_id': 'd-day',
|
||||
'uploader': 'Real Engineering — The Logistics of D-Day',
|
||||
'uploader_id': 'd-day',
|
||||
'series': 'Real Engineering — The Logistics of D-Day',
|
||||
'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
|
||||
'creator': 'Real Engineering — The Logistics of D-Day',
|
||||
'duration': 841,
|
||||
'channel_url': 'https://nebula.tv/d-day',
|
||||
'uploader_url': 'https://nebula.tv/d-day',
|
||||
'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
|
||||
# '_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'],
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'skip': 'Only available for registered users',
|
||||
}, {
|
||||
'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
|
||||
'md5': 'ebe28a7ad822b9ee172387d860487868',
|
||||
'info_dict': {
|
||||
'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 1: The Draw',
|
||||
'description': r'contains:There’s free money on offer… if the players can all work together.',
|
||||
'upload_date': '20200323',
|
||||
'timestamp': 1584980400,
|
||||
'channel': 'Tom Scott Presents: Money',
|
||||
'channel_id': 'tom-scott-presents-money',
|
||||
'uploader': 'Tom Scott Presents: Money',
|
||||
'uploader_id': 'tom-scott-presents-money',
|
||||
'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
|
||||
'duration': 825,
|
||||
'channel_url': 'https://nebula.tv/tom-scott-presents-money',
|
||||
'series': 'Tom Scott Presents: Money',
|
||||
'display_id': 'money-episode-1-the-draw',
|
||||
'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
|
||||
# '_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'],
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'skip': 'Only available for registered users',
|
||||
}, {
|
||||
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
|
||||
'info_dict': {
|
||||
'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
|
||||
'title': 'Did the US Really Blow Up the NordStream Pipelines?',
|
||||
'description': 'md5:b4e2a14e3ff08f546a3209c75261e789',
|
||||
'upload_date': '20230223',
|
||||
'timestamp': 1677144070,
|
||||
'channel': 'TLDR News EU',
|
||||
'channel_id': 'tldrnewseu',
|
||||
'uploader': 'TLDR News EU',
|
||||
'uploader_id': 'tldrnewseu',
|
||||
'uploader_url': r're:https://nebula\.(tv|app)/tldrnewseu',
|
||||
'duration': 524,
|
||||
'channel_url': r're:https://nebula\.(tv|app)/tldrnewseu',
|
||||
'series': 'TLDR News EU',
|
||||
'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
|
||||
'creator': 'TLDR News EU',
|
||||
# '_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'],
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
# extract the video's display ID from the URL (we'll retrieve the video ID later)
|
||||
display_id = self._match_id(url)
|
||||
slug = self._match_id(url)
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
if smuggled_data.get('id'):
|
||||
return merge_dicts({
|
||||
'id': smuggled_data['id'],
|
||||
'display_id': slug,
|
||||
'title': '',
|
||||
}, self._extract_formats(smuggled_data['id'], slug))
|
||||
|
||||
# retrieve Nebula authentication information
|
||||
nebula_token = self._retrieve_nebula_auth(display_id)
|
||||
metadata = self._call_api(
|
||||
'https://content.api.nebula.app/content/videos/{0}'.format(slug),
|
||||
slug, note='Fetching video metadata')
|
||||
return merge_dicts(
|
||||
self._extract_video_metadata(metadata),
|
||||
self._extract_formats(metadata['id'], slug),
|
||||
rev=True
|
||||
)
|
||||
|
||||
# fetch video meta data from the Nebula API
|
||||
api_key = self._retrieve_zype_api_key(url, display_id)
|
||||
video_meta = self._fetch_zype_video_data(display_id, api_key)
|
||||
video_id = video_meta['_id']
|
||||
|
||||
# extract additional info
|
||||
channel_title = self._extract_channel(video_meta)
|
||||
class NebulaClassIE(NebulaBaseIE):
|
||||
IE_NAME = 'nebula:media'
|
||||
_VALID_URL = r'{0}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'.format(_BASE_URL_RE)
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
|
||||
'info_dict': {
|
||||
'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
|
||||
'ext': 'mp4',
|
||||
'display_id': '14',
|
||||
'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
|
||||
'episode_number': 14,
|
||||
'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
|
||||
'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
|
||||
'duration': 646,
|
||||
'episode': 'Episode 14',
|
||||
'title': 'Photos, Sculpture, and Video',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'skip': 'Only available for registered users',
|
||||
}, {
|
||||
'add_ies': [Art19IE],
|
||||
'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
|
||||
'info_dict': {
|
||||
'ext': 'mp3',
|
||||
'id': '83ef3b53-049e-4211-b34e-7bb518e67d64',
|
||||
'description': r"re:(?s)20 years ago, what was previously the Soviet Union's .{467}#do-not-sell-my-info\.$",
|
||||
'series_id': 'e0223cfc-f39c-4ad4-8724-bd8731bd31b5',
|
||||
'modified_timestamp': 1629410982,
|
||||
'episode_id': '83ef3b53-049e-4211-b34e-7bb518e67d64',
|
||||
'series': 'Extremities',
|
||||
# 'modified_date': '20200903',
|
||||
'upload_date': '20200902',
|
||||
'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
|
||||
'release_timestamp': 1571237958,
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'duration': 1546.05714,
|
||||
'timestamp': 1599085555,
|
||||
'release_date': '20191016',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
|
||||
'info_dict': {
|
||||
'ext': 'mp3',
|
||||
'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
|
||||
'episode_number': 1,
|
||||
'thumbnail': r're:https?://images\.nebula\.tv/[a-f\d-]+$',
|
||||
'release_date': '20230304',
|
||||
'modified_date': '20230403',
|
||||
'series': 'The Layover',
|
||||
'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
|
||||
'modified_timestamp': 1680554566,
|
||||
'duration': 3130.46401,
|
||||
'release_timestamp': 1677943800,
|
||||
'title': 'The Layover — Episode 1',
|
||||
'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
|
||||
'upload_date': '20230303',
|
||||
'episode': 'Episode 1',
|
||||
'timestamp': 1677883672,
|
||||
'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'skip': 'Only available for registered users',
|
||||
}]
|
||||
|
||||
# fetch the access token for Zype, then construct the video URL
|
||||
zype_access_token = self._fetch_zype_access_token(display_id, nebula_token=nebula_token)
|
||||
video_url = self._build_video_url(video_id, zype_access_token)
|
||||
def _real_extract(self, url):
|
||||
slug, episode = self._match_valid_url(url).group('id', 'ep')
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
if smuggled_data.get('id'):
|
||||
return merge_dicts({
|
||||
'id': smuggled_data['id'],
|
||||
'display_id': slug,
|
||||
'title': '',
|
||||
}, self._extract_formats(smuggled_data['id'], slug))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
metadata = self._call_api(
|
||||
'https://content.api.nebula.app/content/{0}/{1}/?include=lessons'.format(
|
||||
slug, episode),
|
||||
slug, note='Fetching class/podcast metadata')
|
||||
content_type = traverse_obj(metadata, 'type')
|
||||
if content_type == 'lesson':
|
||||
return merge_dicts(
|
||||
self._extract_video_metadata(metadata),
|
||||
self._extract_formats(metadata['id'], slug))
|
||||
elif content_type == 'podcast_episode':
|
||||
episode_url = metadata.get('episode_url')
|
||||
if not episode_url and metadata.get('premium'):
|
||||
self.raise_login_required()
|
||||
|
||||
# we're passing this video URL on to the 'Zype' extractor (that's the video infrastructure that Nebula is
|
||||
# built on top of) and use the 'url_transparent' type to indicate that our meta data should be better than
|
||||
# whatever the Zype extractor is able to identify
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Zype',
|
||||
'url': video_url,
|
||||
if Art19IE.suitable(episode_url):
|
||||
return self.url_result(episode_url, Art19IE.ie_key())
|
||||
return merge_dicts({
|
||||
'id': metadata['id'],
|
||||
'title': metadata['title'],
|
||||
}, traverse_obj(metadata, {
|
||||
'url': ('episode_url', T(url_or_none)),
|
||||
'description': ('description', T(str_or_none)),
|
||||
'timestamp': ('published_at', T(parse_iso8601)),
|
||||
'duration': ('duration', T(int_or_none)),
|
||||
'channel_id': ('channel_id', T(str_or_none)),
|
||||
'channel': ('channel_title', T(str_or_none)),
|
||||
'thumbnail': ('assets', 'regular', T(url_or_none)),
|
||||
}))
|
||||
|
||||
# the meta data we were able to extract from Nebula
|
||||
'title': video_meta.get('title'),
|
||||
'description': video_meta.get('description'),
|
||||
'timestamp': parse_iso8601(video_meta.get('published_at')),
|
||||
'thumbnails': [
|
||||
{
|
||||
'id': tn.get('name'), # this appears to be null in all cases I've encountered
|
||||
'url': tn['url'],
|
||||
'width': tn.get('width'),
|
||||
'height': tn.get('height'),
|
||||
} for tn in video_meta.get('thumbnails', [])],
|
||||
'duration': video_meta.get('duration'),
|
||||
'channel': channel_title,
|
||||
'uploader': channel_title, # we chose here to declare the channel name as the 'uploader' -- that's certainly arguable, as sometimes it's more of a series
|
||||
# TODO: uploader_url: the video page clearly links to this (in the example case: /lindsayellis), but I cannot figure out where it gets it from!
|
||||
# TODO: channel_id
|
||||
# TODO: channel_url
|
||||
}
|
||||
raise ExtractorError('Unexpected content type {0!r}'.format(content_type))
|
||||
|
||||
|
||||
class NebulaPlaylistBaseIE(NebulaBaseIE):
|
||||
_BASE_API_URL = 'https://content.api.nebula.app/'
|
||||
_API_QUERY = {'ordering': '-published_at'}
|
||||
|
||||
@classmethod
|
||||
def _get_api_url(cls, item_id, path='/video_episodes/'):
|
||||
return update_url(cls._BASE_API_URL, path=path, query_update=cls._API_QUERY)
|
||||
|
||||
@staticmethod
|
||||
def _get_episode_url(episode, episode_id):
|
||||
return 'https://nebula.tv/videos/{0}'.format(episode_id)
|
||||
|
||||
@classmethod
|
||||
def url_result(cls, url, *args, **kwargs):
|
||||
url_transparent = kwargs.pop('url_transparent', False)
|
||||
smuggled_data = kwargs.pop('smuggled_data', None)
|
||||
if smuggled_data:
|
||||
url = smuggle_url(url, smuggled_data)
|
||||
ie_key = args[0] if len(args) > 0 else kwargs.get('ie_key')
|
||||
if not ie_key:
|
||||
args = (NebulaIE.ie_key(),) + args
|
||||
return merge_dicts(
|
||||
{'_type': 'url_transparent'} if url_transparent else {},
|
||||
super(NebulaPlaylistBaseIE, cls).url_result(url, *args),
|
||||
**kwargs)
|
||||
|
||||
def _generate_playlist_entries(self, pl_id=None, slug=None, dl_note=None):
|
||||
next_url = self._get_api_url(pl_id)
|
||||
if dl_note is None:
|
||||
dl_note = self.IE_NAME.rpartition(':')[::2]
|
||||
if dl_note[0] and dl_note[1]:
|
||||
dl_note = '{0} '.format(dl_note[1])
|
||||
else:
|
||||
dl_note = ''
|
||||
slug = slug or pl_id
|
||||
for page_num in itertools.count(1):
|
||||
episodes = self._call_api(
|
||||
next_url, slug, note='Retrieving {0}page {1}'.format(
|
||||
dl_note, page_num))
|
||||
for episode in traverse_obj(episodes, ('results', Ellipsis)):
|
||||
metadata = self._extract_video_metadata(episode)
|
||||
yield self.url_result(
|
||||
self._get_episode_url(episode, metadata['display_id']),
|
||||
smuggled_data={'id': episode['id']}, url_transparent=True,
|
||||
**metadata)
|
||||
next_url = episodes.get('next')
|
||||
if not next_url:
|
||||
break
|
||||
|
||||
|
||||
class NebulaSubscriptionsIE(NebulaPlaylistBaseIE):
|
||||
IE_NAME = 'nebula:subscriptions'
|
||||
_VALID_URL = r'{0}/myshows'.format(_BASE_URL_RE)
|
||||
_API_QUERY = {
|
||||
'following': 'true',
|
||||
'include': 'engagement',
|
||||
'ordering': '-published_at',
|
||||
}
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/myshows',
|
||||
'playlist_mincount': 1,
|
||||
'info_dict': {
|
||||
'id': 'myshows',
|
||||
},
|
||||
'skip': 'You must be logged in to find your subscriptions',
|
||||
}]
|
||||
|
||||
def _call_api(self, *args, **kwargs):
|
||||
|
||||
try:
|
||||
return super(NebulaSubscriptionsIE, self)._call_api(*args, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
self.raise_login_required('You must be logged in to find your subscriptions')
|
||||
raise
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug = url_basename(url)
|
||||
return self.playlist_result(self._generate_playlist_entries(slug), slug)
|
||||
|
||||
|
||||
class NebulaChannelIE(NebulaPlaylistBaseIE):
|
||||
IE_NAME = 'nebula:channel'
|
||||
_VALID_URL = r'{0}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'.format(_BASE_URL_RE)
|
||||
_TESTS = [{
|
||||
'url': 'https://nebula.tv/tom-scott-presents-money',
|
||||
'info_dict': {
|
||||
'id': 'tom-scott-presents-money',
|
||||
'title': 'Tom Scott Presents: Money',
|
||||
'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
}, {
|
||||
'url': 'https://nebula.tv/lindsayellis',
|
||||
'info_dict': {
|
||||
'id': 'lindsayellis',
|
||||
'title': 'Lindsay Ellis',
|
||||
'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://nebula.tv/johnnyharris',
|
||||
'info_dict': {
|
||||
'id': 'johnnyharris',
|
||||
'title': 'Johnny Harris',
|
||||
'description': 'I make videos about maps and many other things.',
|
||||
},
|
||||
'playlist_mincount': 90,
|
||||
}, {
|
||||
'url': 'https://nebula.tv/copyright-for-fun-and-profit',
|
||||
'info_dict': {
|
||||
'id': 'copyright-for-fun-and-profit',
|
||||
'title': 'Copyright for Fun and Profit',
|
||||
'description': 'md5:6690248223eed044a9f11cd5a24f9742',
|
||||
},
|
||||
'playlist_count': 23,
|
||||
}, {
|
||||
'url': 'https://nebula.tv/trussissuespodcast',
|
||||
'info_dict': {
|
||||
'id': 'trussissuespodcast',
|
||||
'title': 'Bite the Ballot',
|
||||
'description': 'md5:a08c4483bc0b705881d3e0199e721385',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _get_api_url(cls, item_id, path='/video_channels/{0}/video_episodes/'):
|
||||
return super(NebulaChannelIE, cls)._get_api_url(
|
||||
item_id, path=path.format(item_id))
|
||||
|
||||
@classmethod
|
||||
def _get_episode_url(cls, episode, episode_id):
|
||||
return (
|
||||
episode.get('share_url')
|
||||
or super(NebulaChannelIE, cls)._get_episode_url(episode, episode_id))
|
||||
|
||||
def _generate_class_entries(self, channel):
|
||||
for lesson in traverse_obj(channel, ('lessons', Ellipsis)):
|
||||
metadata = self._extract_video_metadata(lesson)
|
||||
yield self.url_result(
|
||||
lesson.get('share_url') or 'https://nebula.tv/{0}/{1}'.format(
|
||||
metadata['class_slug'], metadata['slug']),
|
||||
smuggled_data={'id': lesson['id']}, url_transparent=True,
|
||||
**metadata)
|
||||
|
||||
def _generate_podcast_entries(self, collection_id, collection_slug):
|
||||
next_url = 'https://content.api.nebula.app/podcast_channels/{0}/podcast_episodes/?ordering=-published_at&premium=true'.format(
|
||||
collection_id)
|
||||
for page_num in itertools.count(1):
|
||||
episodes = self._call_api(next_url, collection_slug, note='Retrieving podcast page {0}'.format(page_num))
|
||||
|
||||
for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
|
||||
yield self.url_result(episode['share_url'], NebulaClassIE)
|
||||
next_url = episodes.get('next')
|
||||
if not next_url:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_slug = self._match_id(url)
|
||||
channel = self._call_api(
|
||||
'https://content.api.nebula.app/content/{0}/?include=lessons'.format(
|
||||
collection_slug),
|
||||
collection_slug, note='Retrieving channel')
|
||||
|
||||
channel_type = traverse_obj(channel, 'type')
|
||||
if channel_type == 'class':
|
||||
entries = self._generate_class_entries(channel)
|
||||
elif channel_type == 'podcast_channel':
|
||||
entries = self._generate_podcast_entries(channel['id'], collection_slug)
|
||||
else:
|
||||
entries = self._generate_playlist_entries(channel['id'], collection_slug)
|
||||
|
||||
return self.playlist_result(
|
||||
entries,
|
||||
playlist_id=collection_slug,
|
||||
playlist_title=channel.get('title'),
|
||||
playlist_description=channel.get('description'))
|
||||
|
Loading…
Reference in New Issue
Block a user