1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2025-01-10 19:48:57 +00:00

[teamcoco] Fix extraction for full episodes(closes #16573)

This commit is contained in:
Remita Amine 2018-05-30 13:21:07 +01:00
parent a07879d6b2
commit e0d42dd4b2
3 changed files with 122 additions and 88 deletions

View File

@ -4,6 +4,10 @@ from __future__ import unicode_literals
import re import re
from .turner import TurnerBaseIE from .turner import TurnerBaseIE
from ..compat import (
compat_urllib_parse_urlparse,
compat_parse_qs,
)
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
@ -38,48 +42,22 @@ class TBSIE(TurnerBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
site, display_id = re.match(self._VALID_URL, url).groups() site, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_data = self._parse_json(self._search_regex( drupal_settings = self._parse_json(self._search_regex(
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>', r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
webpage, 'drupal setting'), display_id)['turner_playlist'][0] webpage, 'drupal setting'), display_id)
video_data = drupal_settings['turner_playlist'][0]
media_id = video_data['mediaID'] media_id = video_data['mediaID']
title = video_data['title'] title = video_data['title']
tokenizer_query = compat_parse_qs(compat_urllib_parse_urlparse(
drupal_settings['ngtv_token_url']).query)
streams_data = self._download_json( info = self._extract_ngtv_info(
'http://medium.ngtv.io/media/%s/tv' % media_id, media_id, tokenizer_query, {
media_id)['media']['tv'] 'url': url,
duration = None 'site_name': site[:3].upper(),
chapters = [] 'auth_required': video_data.get('authRequired') == '1',
formats = [] })
for supported_type in ('unprotected', 'bulkaes'):
stream_data = streams_data.get(supported_type, {})
m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
if not m3u8_url:
continue
if stream_data.get('playlistProtection') == 'spe':
m3u8_url = self._add_akamai_spe_token(
'http://token.vgtf.net/token/token_spe',
m3u8_url, media_id, {
'url': url,
'site_name': site[:3].upper(),
'auth_required': video_data.get('authRequired') == '1',
})
formats.extend(self._extract_m3u8_formats(
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
if not chapters:
for chapter in stream_data.get('contentSegments', []):
start_time = float_or_none(chapter.get('start'))
duration = float_or_none(chapter.get('duration'))
if start_time is None or duration is None:
continue
chapters.append({
'start_time': start_time,
'end_time': start_time + duration,
})
self._sort_formats(formats)
thumbnails = [] thumbnails = []
for image_id, image in video_data.get('images', {}).items(): for image_id, image in video_data.get('images', {}).items():
@ -98,15 +76,14 @@ class TBSIE(TurnerBaseIE):
}) })
thumbnails.append(i) thumbnails.append(i)
return { info.update({
'id': media_id, 'id': media_id,
'title': title, 'title': title,
'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')), 'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
'duration': duration, 'duration': float_or_none(video_data.get('duration')) or info.get('duration'),
'timestamp': int_or_none(video_data.get('created')), 'timestamp': int_or_none(video_data.get('created')),
'season_number': int_or_none(video_data.get('season')), 'season_number': int_or_none(video_data.get('season')),
'episode_number': int_or_none(video_data.get('episode')), 'episode_number': int_or_none(video_data.get('episode')),
'cahpters': chapters,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'formats': formats, })
} return info

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import json import json
from .common import InfoExtractor from .turner import TurnerBaseIE
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
@ -15,7 +15,7 @@ from ..utils import (
) )
class TeamcocoIE(InfoExtractor): class TeamcocoIE(TurnerBaseIE):
_VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)' _VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
_TESTS = [ _TESTS = [
{ {
@ -110,6 +110,8 @@ class TeamcocoIE(InfoExtractor):
name name
} }
duration duration
turnerMediaId
turnerMediaAuthToken
} }
} }
... on NotFoundSlug { ... on NotFoundSlug {
@ -123,53 +125,65 @@ class TeamcocoIE(InfoExtractor):
record = response['record'] record = response['record']
video_id = record['id'] video_id = record['id']
video_sources = self._graphql_call('''{ info = {
%s(id: "%s") {
src
}
}''', 'RecordVideoSource', video_id) or {}
formats = []
get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
for format_id, src in video_sources.get('src', {}).items():
if not isinstance(src, dict):
continue
src_url = src.get('src')
if not src_url:
continue
ext = determine_ext(src_url, mimetype2ext(src.get('type')))
if format_id == 'hls' or ext == 'm3u8':
# compat_urllib_parse.urljoin does not work here
if src_url.startswith('/'):
src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
formats.extend(self._extract_m3u8_formats(
src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
else:
if src_url.startswith('/mp4:protected/'):
# TODO Correct extraction for these files
continue
tbr = int_or_none(self._search_regex(
r'(\d+)k\.mp4', src_url, 'tbr', default=None))
formats.append({
'url': src_url,
'ext': ext,
'tbr': tbr,
'format_id': format_id,
'quality': get_quality(format_id),
})
if not formats:
formats = self._extract_m3u8_formats(
record['file']['url'], video_id, 'mp4', fatal=False)
self._sort_formats(formats)
return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'formats': formats,
'title': record['title'], 'title': record['title'],
'thumbnail': record.get('thumb', {}).get('preview'), 'thumbnail': record.get('thumb', {}).get('preview'),
'description': record.get('teaser'), 'description': record.get('teaser'),
'duration': parse_duration(record.get('duration')), 'duration': parse_duration(record.get('duration')),
'timestamp': parse_iso8601(record.get('publishOn')), 'timestamp': parse_iso8601(record.get('publishOn')),
} }
media_id = record.get('turnerMediaId')
if media_id:
self._initialize_geo_bypass({
'countries': ['US'],
})
info.update(self._extract_ngtv_info(media_id, {
'accessToken': record['turnerMediaAuthToken'],
'accessTokenType': 'jws',
}))
else:
video_sources = self._graphql_call('''{
%s(id: "%s") {
src
}
}''', 'RecordVideoSource', video_id) or {}
formats = []
get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
for format_id, src in video_sources.get('src', {}).items():
if not isinstance(src, dict):
continue
src_url = src.get('src')
if not src_url:
continue
ext = determine_ext(src_url, mimetype2ext(src.get('type')))
if format_id == 'hls' or ext == 'm3u8':
# compat_urllib_parse.urljoin does not work here
if src_url.startswith('/'):
src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
formats.extend(self._extract_m3u8_formats(
src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
else:
if src_url.startswith('/mp4:protected/'):
# TODO Correct extraction for these files
continue
tbr = int_or_none(self._search_regex(
r'(\d+)k\.mp4', src_url, 'tbr', default=None))
formats.append({
'url': src_url,
'ext': ext,
'tbr': tbr,
'format_id': format_id,
'quality': get_quality(format_id),
})
if not formats:
formats = self._extract_m3u8_formats(
record['file']['url'], video_id, 'mp4', fatal=False)
self._sort_formats(formats)
info['formats'] = formats
return info

View File

@ -9,6 +9,7 @@ from ..utils import (
xpath_text, xpath_text,
int_or_none, int_or_none,
determine_ext, determine_ext,
float_or_none,
parse_duration, parse_duration,
xpath_attr, xpath_attr,
update_url_query, update_url_query,
@ -23,14 +24,17 @@ class TurnerBaseIE(AdobePassIE):
def _extract_timestamp(self, video_data): def _extract_timestamp(self, video_data):
return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data): def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None):
secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path) token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
if not token: if not token:
query = { query = {
'path': secure_path, 'path': secure_path,
'videoId': content_id,
} }
if custom_tokenizer_query:
query.update(custom_tokenizer_query)
else:
query['videoId'] = content_id
if ap_data.get('auth_required'): if ap_data.get('auth_required'):
query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name']) query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
auth = self._download_xml( auth = self._download_xml(
@ -188,3 +192,42 @@ class TurnerBaseIE(AdobePassIE):
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
'is_live': is_live, 'is_live': is_live,
} }
def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
streams_data = self._download_json(
'http://medium.ngtv.io/media/%s/tv' % media_id,
media_id)['media']['tv']
duration = None
chapters = []
formats = []
for supported_type in ('unprotected', 'bulkaes'):
stream_data = streams_data.get(supported_type, {})
m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
if not m3u8_url:
continue
if stream_data.get('playlistProtection') == 'spe':
m3u8_url = self._add_akamai_spe_token(
'http://token.ngtv.io/token/token_spe',
m3u8_url, media_id, ap_data or {}, tokenizer_query)
formats.extend(self._extract_m3u8_formats(
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
duration = float_or_none(stream_data.get('totalRuntime'))
if not chapters:
for chapter in stream_data.get('contentSegments', []):
start_time = float_or_none(chapter.get('start'))
chapter_duration = float_or_none(chapter.get('duration'))
if start_time is None or chapter_duration is None:
continue
chapters.append({
'start_time': start_time,
'end_time': start_time + chapter_duration,
})
self._sort_formats(formats)
return {
'formats': formats,
'chapters': chapters,
'duration': duration,
}