1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2025-07-20 17:26:19 +00:00

Merge d7b502a7278097f68592dc5f6423141be7c69efb into 4d05f84325070c3f6fe2ed6096138757675469a4

This commit is contained in:
dirkf 2024-06-27 06:36:55 +08:00 committed by GitHub
commit 0e08823ff3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 124 additions and 64 deletions

View File

@ -2448,12 +2448,12 @@ try:
except ImportError: except ImportError:
import BaseHTTPServer as compat_http_server import BaseHTTPServer as compat_http_server
# urllib.parse
try: try:
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote from urllib.parse import unquote as compat_urllib_parse_unquote
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
from urllib.parse import urlencode as compat_urllib_parse_urlencode from urllib.parse import urlencode as compat_urllib_parse_urlencode
from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2 except ImportError: # Python 2
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire') _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
else re.compile(r'([\x00-\x7f]+)')) else re.compile(r'([\x00-\x7f]+)'))
@ -2543,60 +2543,80 @@ except ImportError: # Python 2
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq) return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
# Python 2's version is apparently totally broken
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'):
qs, _coerce_result = qs, compat_str
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = []
for name_value in pairs:
if not name_value and not strict_parsing:
continue
nv = name_value.split('=', 1)
if len(nv) != 2:
if strict_parsing:
raise ValueError('bad query field: %r' % (name_value,))
# Handle case of a control-name with no equal sign
if keep_blank_values:
nv.append('')
else:
continue
if len(nv[1]) or keep_blank_values:
name = nv[0].replace('+', ' ')
name = compat_urllib_parse_unquote(
name, encoding=encoding, errors=errors)
name = _coerce_result(name)
value = nv[1].replace('+', ' ')
value = compat_urllib_parse_unquote(
value, encoding=encoding, errors=errors)
value = _coerce_result(value)
r.append((name, value))
return r
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'):
parsed_result = {}
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
encoding=encoding, errors=errors)
for name, value in pairs:
if name in parsed_result:
parsed_result[name].append(value)
else:
parsed_result[name] = [value]
return parsed_result
setattr(compat_urllib_parse, '_urlencode', setattr(compat_urllib_parse, '_urlencode',
getattr(compat_urllib_parse, 'urlencode')) getattr(compat_urllib_parse, 'urlencode'))
for name, fix in ( for name, fix in (
('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes), ('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
('parse_unquote', compat_urllib_parse_unquote), ('parse_unquote', compat_urllib_parse_unquote),
('unquote_plus', compat_urllib_parse_unquote_plus), ('unquote_plus', compat_urllib_parse_unquote_plus),
('urlencode', compat_urllib_parse_urlencode), ('urlencode', compat_urllib_parse_urlencode)):
('parse_qs', compat_parse_qs)):
setattr(compat_urllib_parse, name, fix) setattr(compat_urllib_parse, name, fix)
finally:
try:
# arguments changed in 3.8 and 3.10
from urllib.parse import parse_qs as _parse_qs
_parse_qs('a=b', separator='&')
compat_parse_qs = _parse_qs
except (ImportError, TypeError): # Python 2, < 3.10
compat_urllib_parse_parse_qs = compat_parse_qs # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
# Python 2's version is apparently totally broken
# Also use this implementation for Py < 3.10
# * support only default separator '&', not r'[&;]', like 3.10+
# * support max_num_fields, like 3.8+
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace',
max_num_fields=None, separator='&'):
if not isinstance(separator, (compat_str, str)):
raise ValueError('Separator must be of type string or bytes')
# DoS protection, if anyone cares
if qs and max_num_fields is not None and qs.count(separator) >= max_num_fields:
raise ValueError('Too many fields')
_coerce_result = compat_str
r = []
for name_value in qs.split(separator):
if not name_value and not strict_parsing:
continue
nv = name_value.split('=', 1)
if len(nv) != 2:
if strict_parsing:
raise ValueError('bad query field: %r' % (name_value,))
# Handle case of a control-name with no equal sign
if keep_blank_values:
nv.append('')
else:
continue
if len(nv[1]) or keep_blank_values:
name = nv[0].replace('+', ' ')
name = compat_urllib_parse_unquote(
name, encoding=encoding, errors=errors)
name = _coerce_result(name)
value = nv[1].replace('+', ' ')
value = compat_urllib_parse_unquote(
value, encoding=encoding, errors=errors)
value = _coerce_result(value)
r.append((name, value))
return r
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace',
max_num_fields=None, separator='&'):
parsed_result = {}
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
encoding, errors, max_num_fields, separator)
for name, value in pairs:
if name in parsed_result:
parsed_result[name].append(value)
else:
parsed_result[name] = [value]
return parsed_result
for name, fix in (
('parse_qs', compat_parse_qs),
('parse_qsl', _parse_qsl)):
setattr(compat_urllib_parse, name, fix)
compat_urllib_parse_parse_qs = compat_parse_qs
try: try:
from urllib.request import DataHandler as compat_urllib_request_DataHandler from urllib.request import DataHandler as compat_urllib_request_DataHandler

View File

@ -4,7 +4,12 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import extract_attributes from ..utils import (
extract_attributes,
parse_qs,
remove_start,
smuggle_url,
)
class BFIPlayerIE(InfoExtractor): class BFIPlayerIE(InfoExtractor):
@ -12,26 +17,39 @@ class BFIPlayerIE(InfoExtractor):
_VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online' _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
_TEST = { _TEST = {
'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online', 'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online',
'md5': 'e8783ebd8e061ec4bc6e9501ed547de8', 'md5': '15598bdd6a413ce9363970754f054d76',
'info_dict': { 'info_dict': {
'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63', 'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Computer Doctor', 'title': 'Computer Doctor',
'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b', 'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b',
'timestamp': 1564424975,
'upload_date': '20190729',
'uploader_id': '6057949427001',
}, },
'skip': 'BFI Player films cannot be played outside of the UK', # 'skip': 'BFI Player films cannot be played outside of the UK',
} }
_BRIGHTCOVE_ACCOUNT_ID = '6057949427001'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
entries = []
for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage): film_only = 'play-film' in parse_qs(url, keep_blank_values=True)
player_attr = extract_attributes(player_el)
ooyala_id = player_attr.get('data-video-id') def entries():
if not ooyala_id: for player_el in re.finditer(r'(?s)<video-js\b[^>]+>', webpage):
continue player_attr = extract_attributes(player_el.group(0))
entries.append(self.url_result( bcv_id, account_id, player_id, embed = (
'ooyala:' + ooyala_id, 'Ooyala', player_attr.get(x) for x in ('data-ref-id', 'data-acid', 'data-pid', 'data-embed'))
ooyala_id, player_attr.get('data-label'))) if not bcv_id:
return self.playlist_result(entries) continue
if film_only and player_attr.get('data-video-type') != 'film':
continue
bc_url = 'brightcove:new:%s:%s:%s:video:ref:%s' % (
account_id or self._BRIGHTCOVE_ACCOUNT_ID, player_id or 'default', embed or 'default', bcv_id)
yield self.url_result(smuggle_url(
bc_url, {'referrer': url, 'force_videoid': remove_start(bcv_id, 'ref:')}), ie='BrightcoveNew', video_id=video_id)
return self.playlist_result(entries())

View File

@ -340,7 +340,7 @@ class BrightcoveLegacyIE(InfoExtractor):
class BrightcoveNewIE(AdobePassIE): class BrightcoveNewIE(AdobePassIE):
IE_NAME = 'brightcove:new' IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)' _VALID_URL = r'(?:brightcove:new|(?P<u>https?)):(?(u)//players\.brightcove\.net/)(?P<account_id>\d+)(?(u)/|:)(?P<player_id>[^/]+)(?(u)_|:)(?P<embed>[^/]+)(?(u)/index\.html\?.*|:)(?P<content_type>video|playlist)(?(u)Id=|:)(?P<video_id>\d+|ref:[^&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'md5': 'c8100925723840d4b0d243f7025703be', 'md5': 'c8100925723840d4b0d243f7025703be',
@ -593,7 +593,7 @@ class BrightcoveNewIE(AdobePassIE):
'ip_blocks': smuggled_data.get('geo_ip_blocks'), 'ip_blocks': smuggled_data.get('geo_ip_blocks'),
}) })
account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()[1:]
policy_key_id = '%s_%s' % (account_id, player_id) policy_key_id = '%s_%s' % (account_id, player_id)
policy_key = self._downloader.cache.load('brightcove', policy_key_id) policy_key = self._downloader.cache.load('brightcove', policy_key_id)
@ -678,4 +678,4 @@ class BrightcoveNewIE(AdobePassIE):
json_data.get('description')) json_data.get('description'))
return self._parse_brightcove_metadata( return self._parse_brightcove_metadata(
json_data, video_id, headers=headers) json_data, smuggled_data.get('force_videoid') or video_id, headers=headers)

View File

@ -2402,7 +2402,7 @@ class YoutubeDLError(Exception):
class ExtractorError(YoutubeDLError): class ExtractorError(YoutubeDLError):
"""Error during info extraction.""" """Error during info extraction."""
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None): def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
""" tb, if given, is the original traceback (so that it can be printed out). """ tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl. If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
""" """
@ -2421,6 +2421,7 @@ class ExtractorError(YoutubeDLError):
self.exc_info = sys.exc_info() # preserve original exception self.exc_info = sys.exc_info() # preserve original exception
self.cause = cause self.cause = cause
self.video_id = video_id self.video_id = video_id
self.ie = ie
def format_traceback(self): def format_traceback(self):
if self.traceback is None: if self.traceback is None:
@ -6561,3 +6562,24 @@ def join_nonempty(*values, **kwargs):
if from_dict is not None: if from_dict is not None:
values = (traverse_obj(from_dict, variadic(v)) for v in values) values = (traverse_obj(from_dict, variadic(v)) for v in values)
return delim.join(map(compat_str, filter(None, values))) return delim.join(map(compat_str, filter(None, values)))
class classproperty(object):
"""property access for class methods with optional caching"""
def __new__(cls, *args, **kwargs):
func = args[0] if len(args) > 0 else kwargs.get('func')
if not func:
return functools.partial(cls, *args, **kwargs)
return super(classproperty, cls).__new__(cls)
def __init__(self, func, cache=False):
functools.update_wrapper(self, func)
self.func = func
self._cache = {} if cache else None
def __get__(self, _, cls):
if self._cache is None:
return self.func(cls)
elif cls not in self._cache:
self._cache[cls] = self.func(cls)
return self._cache[cls]