mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-22 07:56:49 +00:00
[YouTube] Always extract using MWEB API client
* temporary fix-up for 403 on download * MWEB parameters from yt-dlp 2024-12-06
This commit is contained in:
parent
eeafbbc3e5
commit
d55d1f423d
@ -3,11 +3,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
|
import hashlib
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import os.path
|
import os.path
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
@ -290,6 +292,33 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
||||||
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
||||||
|
|
||||||
|
_SAPISID = None
|
||||||
|
|
||||||
|
def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
|
||||||
|
time_now = round(time.time())
|
||||||
|
if self._SAPISID is None:
|
||||||
|
yt_cookies = self._get_cookies('https://www.youtube.com')
|
||||||
|
# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
|
||||||
|
# See: https://github.com/yt-dlp/yt-dlp/issues/393
|
||||||
|
sapisid_cookie = dict_get(
|
||||||
|
yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
|
||||||
|
if sapisid_cookie and sapisid_cookie.value:
|
||||||
|
self._SAPISID = sapisid_cookie.value
|
||||||
|
self.write_debug('Extracted SAPISID cookie')
|
||||||
|
# SAPISID cookie is required if not already present
|
||||||
|
if not yt_cookies.get('SAPISID'):
|
||||||
|
self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
|
||||||
|
self._set_cookie(
|
||||||
|
'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
|
||||||
|
else:
|
||||||
|
self._SAPISID = False
|
||||||
|
if not self._SAPISID:
|
||||||
|
return None
|
||||||
|
# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
|
||||||
|
sapisidhash = hashlib.sha1(
|
||||||
|
'{0} {1} {2}'.format(time_now, self._SAPISID, origin).encode('utf-8')).hexdigest()
|
||||||
|
return 'SAPISIDHASH {0}_{1}'.format(time_now, sapisidhash)
|
||||||
|
|
||||||
def _call_api(self, ep, query, video_id, fatal=True, headers=None):
|
def _call_api(self, ep, query, video_id, fatal=True, headers=None):
|
||||||
data = self._DEFAULT_API_DATA.copy()
|
data = self._DEFAULT_API_DATA.copy()
|
||||||
data.update(query)
|
data.update(query)
|
||||||
@ -1914,9 +1943,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
player_response = self._extract_yt_initial_variable(
|
player_response = self._extract_yt_initial_variable(
|
||||||
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
|
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
|
||||||
video_id, 'initial player response')
|
video_id, 'initial player response')
|
||||||
if not player_response:
|
if False and not player_response:
|
||||||
player_response = self._call_api(
|
player_response = self._call_api(
|
||||||
'player', {'videoId': video_id}, video_id)
|
'player', {'videoId': video_id}, video_id)
|
||||||
|
if True or not player_response:
|
||||||
|
origin = 'https://www.youtube.com'
|
||||||
|
pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
|
||||||
|
|
||||||
|
player_url = self._extract_player_url(webpage)
|
||||||
|
ytcfg = self._extract_ytcfg(video_id, webpage)
|
||||||
|
sts = self._extract_signature_timestamp(video_id, player_url, ytcfg)
|
||||||
|
if sts:
|
||||||
|
pb_context['signatureTimestamp'] = sts
|
||||||
|
|
||||||
|
query = {
|
||||||
|
'playbackContext': {
|
||||||
|
'contentPlaybackContext': pb_context,
|
||||||
|
'contentCheckOk': True,
|
||||||
|
'racyCheckOk': True,
|
||||||
|
},
|
||||||
|
'context': {
|
||||||
|
'client': {
|
||||||
|
'clientName': 'MWEB',
|
||||||
|
'clientVersion': '2.20241202.07.00',
|
||||||
|
'hl': 'en',
|
||||||
|
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
|
||||||
|
'timeZone': 'UTC',
|
||||||
|
'utcOffsetMinutes': 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'videoId': video_id,
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
'X-YouTube-Client-Name': '2',
|
||||||
|
'X-YouTube-Client-Version': '2.20241202.07.00',
|
||||||
|
'Origin': origin,
|
||||||
|
'Sec-Fetch-Mode': 'navigate',
|
||||||
|
'User-Agent': query['context']['client']['userAgent'],
|
||||||
|
}
|
||||||
|
auth = self._generate_sapisidhash_header(origin)
|
||||||
|
if auth is not None:
|
||||||
|
headers['Authorization'] = auth
|
||||||
|
headers['X-Origin'] = origin
|
||||||
|
|
||||||
|
player_response = self._call_api('player', query, video_id, fatal=False, headers=headers)
|
||||||
|
|
||||||
def is_agegated(playability):
|
def is_agegated(playability):
|
||||||
if not isinstance(playability, dict):
|
if not isinstance(playability, dict):
|
||||||
@ -2223,12 +2293,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
playable_formats = [f for f in formats if not f.get('has_drm')]
|
playable_formats = [f for f in formats if not f.get('has_drm')]
|
||||||
if formats and not playable_formats:
|
if formats:
|
||||||
# If there are no formats that definitely don't have DRM, all have DRM
|
if not playable_formats:
|
||||||
self.report_drm(video_id)
|
# If there are no formats that definitely don't have DRM, all have DRM
|
||||||
formats[:] = playable_formats
|
self.report_drm(video_id)
|
||||||
|
formats[:] = playable_formats
|
||||||
if not formats:
|
else:
|
||||||
if streaming_data.get('licenseInfos'):
|
if streaming_data.get('licenseInfos'):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'This video is DRM protected.', expected=True)
|
'This video is DRM protected.', expected=True)
|
||||||
|
Loading…
Reference in New Issue
Block a user