From 8b11dbba0051da0c5f22576a2593b15abe6a2deb Mon Sep 17 00:00:00 2001 From: Morgan Harris Date: Tue, 21 Mar 2023 17:25:12 +1100 Subject: [PATCH] Add support for downloading an entire season as a playlist --- youtube_dl/extractor/tenplay.py | 126 +++++++++++++++++++++++++------- 1 file changed, 100 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py index 14a0d4b91..59937400e 100644 --- a/youtube_dl/extractor/tenplay.py +++ b/youtube_dl/extractor/tenplay.py @@ -3,10 +3,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - HEADRequest, parse_age_limit, unescapeHTML, str_to_int, + urljoin ) from datetime import datetime @@ -15,7 +15,11 @@ import json class TenPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?Ptpv\d{6}[a-z]{5})' + _VALID_URL = r'''(?x)^ + https?://(?:www\.)?10play\.com\.au/(?: + (?:[^/]+/)+(?Ptpv\d{6}[a-z]{5})| (?# Individual show id) + (?P[^/]+)/episodes/(?P[^/]+)(?# Entire season playlist) + )''' _TESTS = [{ 'url': 'https://10play.com.au/masterchef/episodes/season-1/episode-1/tpv220408msjpb', 'info_dict': { @@ -28,44 +32,118 @@ class TenPlayIE(InfoExtractor): 'upload_date': '20090427', }, 'params': { - # 'format': 'bestvideo', 'skip_download': True, 'usenetrc': True, } }, { 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc', 'only_matching': True, + }, { + 'info_dict': { + 'title': 'Season 2022' + }, + 'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2022', + 'playlist_count': 256, + 'params': { + 'skip_download': True, + 'usenetrc': True, + } }] _NETRC_MACHINE = '10play.com.au' _GEO_BYPASS = False + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._access_token = None + def get_access_token(self, content_id): - # log in with username and password - username, password = self._get_login_info() + if self._access_token is None: + # log in with username and password + username, password = self._get_login_info() - if username is None or password is None: - self.raise_login_required() + if username is None or password is None: + self.raise_login_required() - ten_auth_header = base64.b64encode(datetime.utcnow().strftime("%Y%m%d%H%M%S").encode()) + ten_auth_header = base64.b64encode(datetime.utcnow().strftime("%Y%m%d%H%M%S").encode()) - auth_request_data = json.dumps({"email": username, "password": password}).encode() - token_data = self._download_json( - 'https://10play.com.au/api/user/auth', content_id, - note='Logging in to 10play', - data=auth_request_data, - headers={ - 'Content-Type': 'application/json;charset=utf-8', - 'X-Network-Ten-Auth': ten_auth_header - }) + auth_request_data = json.dumps({"email": username, "password": password}).encode() + token_data = self._download_json( + 'https://10play.com.au/api/user/auth', content_id, + note='Logging in to 10play', + data=auth_request_data, + headers={ + 'Content-Type': 'application/json;charset=utf-8', + 'X-Network-Ten-Auth': ten_auth_header + }) - access_token = token_data['jwt']['accessToken'] + self._access_token = token_data['jwt']['accessToken'] - return {'Authorization': f"Bearer {access_token}"} + return self._access_token + + def extract_playlist(self, url): + matches = self._VALID_URL_RE.match(url) + show = matches.group('show') + season = matches.group('season') + + # The api/v1 endpoint is throwing up 403 Forbidden, so we need to use the old API + season_info = self._download_json( + f'https://10play.com.au/api/shows/{show}/episodes/{season}', f"{show}/{season}", + note='Fetching playlist info') + + # Try to find a carousel with the title "episodes", otherwise default to the top one + episodes_carousel = next((c for c in season_info['content'][0]['components'] if c['title'].lower() == 'episodes'), + season_info['content'][0]['components'][0]) + + episodes = episodes_carousel['slides'] + + load_more_url = urljoin(url, episodes_carousel['loadMoreUrl']) + + while episodes_carousel['hasMore']: + skip_ids = [ep['id'] for ep in episodes] + + episodes_carousel = self._download_json( + load_more_url, f"{show}/{season}", + note=f'Fetching episodes {len(skip_ids)}+', + query={'skipIds[]': skip_ids}) + + episodes += episodes_carousel['items'] + + episodes_urls = [urljoin(url, ep['cardLink']) for ep in episodes] + + return self.playlist_from_matches(episodes_urls, playlist_title=season_info['content'][0].get('title')) + + # Altered version to check for geoblocking without extraneous HEAD request + def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, + entry_protocol='m3u8', preference=None, + m3u8_id=None, note=None, errnote=None, + fatal=True, live=False, data=None, headers={}, + query={}): + res = self._download_webpage_handle( + m3u8_url, video_id, + note=note or 'Downloading m3u8 information', + errnote=errnote or 'Failed to download m3u8 information', + fatal=fatal, data=data, headers=headers, query=query) + + if res is False: + return [] + + m3u8_doc, urlh = res + m3u8_url = urlh.geturl() + + if '10play-not-in-oz' in m3u8_url: + self.raise_geo_restricted(countries=['AU']) + + return self._parse_m3u8_formats( + m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, + preference=preference, m3u8_id=m3u8_id, live=live) def _real_extract(self, url): content_id = self._match_id(url) + if content_id is None or content_id == 'None': + return self.extract_playlist(url) + video_info = self._download_json( 'https://10play.com.au/api/v1/videos/' + content_id, content_id, note='Fetching video info') @@ -75,19 +153,15 @@ class TenPlayIE(InfoExtractor): # Handle member-gated videos if video_info.get('memberGated'): - extra_headers = self.get_access_token(content_id) - headers.update(**extra_headers) + access_token = self.get_access_token(content_id) + headers.update(Authorization=f"Bearer {access_token}") playback_info = self._download_json( playback_url, content_id, note='Fetching playback info', headers=headers) - m3u8_url = self._request_webpage(HEADRequest(playback_info['source']), content_id).geturl() - if '10play-not-in-oz' in m3u8_url: - self.raise_geo_restricted(countries=['AU']) - - formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4') + formats = self._extract_m3u8_formats(playback_info['source'], content_id, 'mp4') self._sort_formats(formats) return {