From 21fff051217d1c14a7d50fa752052eadbcafee4e Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 15 Jan 2025 03:19:15 +0000 Subject: [PATCH] [YouTube] Switch to TV API client * thx yt-dlp/yt-dlp#12059 --- youtube_dl/extractor/youtube.py | 102 ++++++++++++++++++++++++++------ 1 file changed, 83 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6171df84a..1424277ac 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -31,7 +31,9 @@ from ..utils import ( dict_get, error_to_compat_str, ExtractorError, + filter_dict, float_or_none, + get_first, extract_attributes, get_element_by_attribute, int_or_none, @@ -82,6 +84,34 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)' + _INNERTUBE_CLIENTS = { + # mweb has 'ultralow' formats + # See: https://github.com/yt-dlp/yt-dlp/pull/557 + 'mweb': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'MWEB', + 'clientVersion': '2.20241202.07.00', + # mweb previously did not require PO Token with this UA + 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, + 'REQUIRE_PO_TOKEN': True, + 'SUPPORTS_COOKIES': True, + }, + 'tv': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'TVHTML5', + 'clientVersion': '7.20241201.18.00', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, + 'SUPPORTS_COOKIES': True, + }, + } + def _login(self): """ Attempt to log in to YouTube. @@ -321,19 +351,24 @@ class YoutubeBaseInfoExtractor(InfoExtractor): '{0} {1} {2}'.format(time_now, self._SAPISID, origin).encode('utf-8')).hexdigest() return 'SAPISIDHASH {0}_{1}'.format(time_now, sapisidhash) - def _call_api(self, ep, query, video_id, fatal=True, headers=None): + def _call_api(self, ep, query, video_id, fatal=True, headers=None, + note='Downloading API JSON'): data = self._DEFAULT_API_DATA.copy() data.update(query) real_headers = {'content-type': 'application/json'} if headers: real_headers.update(headers) + # was: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' + api_key = self.get_param('youtube_innertube_key') return self._download_json( 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id, - note='Downloading API JSON', errnote='Unable to download API page', + note=note, errnote='Unable to download API page', data=json.dumps(data).encode('utf8'), fatal=fatal, - headers=real_headers, - query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'}) + headers=real_headers, query=filter_dict({ + 'key': api_key, + 'prettyPrint': 'false', + })) def _extract_yt_initial_data(self, video_id, webpage): return self._parse_json( @@ -342,6 +377,22 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'), video_id) + def _extract_visitor_data(self, *args): + """ + Extract visitorData from an API response or ytcfg + + Appears to be used to track session state + """ + visitor_data = self.get_param('youtube_visitor_data') + if visitor_data: + return visitor_data + + return get_first( + args, (('VISITOR_DATA', + ('INNERTUBE_CONTEXT', 'client', 'visitorData'), + ('responseContext', 'visitorData')), + T(compat_str))) + def _extract_ytcfg(self, video_id, webpage): return self._parse_json( self._search_regex( @@ -1957,6 +2008,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if sts: pb_context['signatureTimestamp'] = sts + client = traverse_obj(self._INNERTUBE_CLIENTS, ( + lambda _, v: not v.get('REQUIRE_PO_TOKEN')), + get_all=False) + query = { 'playbackContext': { 'contentPlaybackContext': pb_context, @@ -1964,30 +2019,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'racyCheckOk': True, }, 'context': { - 'client': { - 'clientName': 'MWEB', - 'clientVersion': '2.20241202.07.00', - 'hl': 'en', - 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', - 'timeZone': 'UTC', - 'utcOffsetMinutes': 0, - }, + 'client': merge_dicts( + traverse_obj(client, ('INNERTUBE_CONTEXT', 'client')), { + 'hl': 'en', + 'timeZone': 'UTC', + 'utcOffsetMinutes': 0, + }), }, 'videoId': video_id, } - headers = { - 'X-YouTube-Client-Name': '2', - 'X-YouTube-Client-Version': '2.20241202.07.00', - 'Origin': origin, + + headers = merge_dicts({ 'Sec-Fetch-Mode': 'navigate', - 'User-Agent': query['context']['client']['userAgent'], - } + 'Origin': origin, + # 'X-Goog-Visitor-Id': self._extract_visitor_data(ytcfg) or '', + }, traverse_obj(client, { + 'X-YouTube-Client-Name': 'INNERTUBE_CONTEXT_CLIENT_NAME', + 'X-YouTube-Client-Version': ( + 'INNERTUBE_CONTEXT', 'client', 'clientVersion'), + 'User-Agent': ( + 'INNERTUBE_CONTEXT', 'client', 'userAgent'), + })) + auth = self._generate_sapisidhash_header(origin) if auth is not None: headers['Authorization'] = auth headers['X-Origin'] = origin - player_response = self._call_api('player', query, video_id, fatal=False, headers=headers) + player_response = self._call_api( + 'player', query, video_id, fatal=False, headers=headers, + note=join_nonempty( + 'Downloading', traverse_obj(query, ( + 'context', 'client', 'clientName')), + 'API JSON', delim=' ')) def is_agegated(playability): if not isinstance(playability, dict):