From 97516d5ed3476dfaa358c441fa904f30901d2697 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 7 Oct 2024 12:53:03 +0100 Subject: [PATCH 01/11] [ORFRadio] Support /programm/ URL format * fixes yt-dlp/yt-dlp#11014 --- youtube_dl/extractor/orf.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 1ee78edbc..2e1341f44 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -112,7 +112,7 @@ class ORFRadioIE(ORFRadioBase): _VALID_URL = ( r'https?://sound\.orf\.at/radio/(?P{0})/sendung/(?P\d+)(?:/(?P\w+))?'.format(_STATION_RE), - r'https?://(?P{0})\.orf\.at/player/(?P\d{{8}})/(?P\d+)'.format(_STATION_RE), + r'https?://(?P{0})\.orf\.at/(?:player|programm)/(?P\d{{8}})/(?P\d+)'.format(_STATION_RE), ) _TESTS = [{ @@ -150,6 +150,10 @@ class ORFRadioIE(ORFRadioBase): 'duration': 1500, }, 'skip': 'Shows from ORF Sound are only available for 30 days.' + }, { + # yt-dlp/yt-dlp#11014 + 'url': 'https://oe1.orf.at/programm/20240916/769302/Playgrounds', + 'only_matching': True, }] def _real_extract(self, url): From 8388ee5f8d5a1a9728546d27a2ca8f13cc133778 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 7 Oct 2024 13:14:16 +0100 Subject: [PATCH 02/11] [HentaiStigma] Support new frame format with HTML5 video * resolves #25019 --- youtube_dl/extractor/hentaistigma.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/hentaistigma.py b/youtube_dl/extractor/hentaistigma.py index 86a93de4d..c01fe05fd 100644 --- a/youtube_dl/extractor/hentaistigma.py +++ b/youtube_dl/extractor/hentaistigma.py @@ -1,6 +1,11 @@ +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + merge_dicts, + traverse_obj, +) class HentaiStigmaIE(InfoExtractor): @@ -24,16 +29,17 @@ class HentaiStigmaIE(InfoExtractor): title = self._html_search_regex( r']+class="posttitle"[^>]*>]*>([^<]+)', webpage, 'title') - wrap_url = self._html_search_regex( + + wrap_url = self._search_regex( r']+src="([^"]+mp4)"', webpage, 'wrapper url') - wrap_webpage = self._download_webpage(wrap_url, video_id) - video_url = self._html_search_regex( - r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url') + vid_page = self._download_webpage(wrap_url, video_id) - return { + entries = self._parse_html5_media_entries(wrap_url, vid_page, video_id) + self._sort_formats(traverse_obj(entries, (0, 'formats')) or []) + + return merge_dicts({ 'id': video_id, - 'url': video_url, 'title': title, 'age_limit': 18, - } + }, entries[0]) From fa7fdb263be6f5f41ad5e3cf54b949f3a918bd02 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 7 Oct 2024 13:28:17 +0100 Subject: [PATCH 03/11] [Mgoon,Kaltura] Fix regex typo `(:?` * thx yt-dlp/yt-dlp#10807 (584d455) --- youtube_dl/extractor/kaltura.py | 2 +- youtube_dl/extractor/mgoon.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 6d4d93394..861b6952b 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -23,7 +23,7 @@ class KalturaIE(InfoExtractor): (?: kaltura:(?P\d+):(?P[0-9a-z_]+)| https?:// - (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ + (?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ (?: (?: # flash player diff --git a/youtube_dl/extractor/mgoon.py b/youtube_dl/extractor/mgoon.py index 7bb473900..56086f7b9 100644 --- a/youtube_dl/extractor/mgoon.py +++ b/youtube_dl/extractor/mgoon.py @@ -13,7 +13,7 @@ from ..utils import ( class MgoonIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)? - (?:(:?m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)| + (?:(?:m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)| video\.mgoon\.com)/(?P[0-9]+)''' _API_URL = 'http://mpos.mgoon.com/player/video?id={0:}' _TESTS = [ From 10f38086d4562e5b5c04dbe55f1f403b732732d3 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 7 Oct 2024 14:54:20 +0100 Subject: [PATCH 04/11] [core] Fix jwplayer format parsing * thx yt-dlp/yt-dlp#10956 --- youtube_dl/extractor/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9b0016d07..c54406e7a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -3128,7 +3128,8 @@ class InfoExtractor(object): continue urls.add(source_url) source_type = source.get('type') or '' - ext = mimetype2ext(source_type) or determine_ext(source_url) + # https://github.com/yt-dlp/yt-dlp/pull/10956 + ext = determine_ext(source_url, default_ext=mimetype2ext(source_type)) if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url: formats.extend(self._extract_m3u8_formats( source_url, video_id, 'mp4', entry_protocol='m3u8_native', From 38b3a0980cdf7580f5db94a21da8eb246beecac6 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sat, 19 Oct 2024 17:01:24 +0100 Subject: [PATCH 05/11] [YandexMusic] Fix CAPTCHA check * correct logic in _download_webpage() hook (yt-dlp/yt-dlp#4432) * improve error message. --- youtube_dl/extractor/yandexmusic.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 8da5b430f..91b731673 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -30,17 +30,20 @@ class YandexMusicBaseIE(InfoExtractor): @staticmethod def _raise_captcha(): raise ExtractorError( - 'YandexMusic has considered youtube-dl requests automated and ' - 'asks you to solve a CAPTCHA. You can either wait for some ' - 'time until unblocked and optionally use --sleep-interval ' - 'in future or alternatively you can go to https://music.yandex.ru/ ' - 'solve CAPTCHA, then export cookies and pass cookie file to ' - 'youtube-dl with --cookies', + 'YandexMusic has considered youtube-dl requests automated ' + 'and asks you to solve a CAPTCHA. You can wait for some time ' + 'until unblocked and optionally use --sleep-interval in future; ' + 'otherwise solve the CAPTCHA at https://music.yandex.ru/, ' + 'then export cookies and pass the cookie file to youtube-dl ' + 'with --cookies.', expected=True) def _download_webpage_handle(self, *args, **kwargs): webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs) - if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: + blocked_ip_msg = ( + 'Нам очень жаль, но запросы, поступившие с ' + 'вашего IP-адреса, похожи на автоматические.') + if blocked_ip_msg in (webpage or [''])[0]: self._raise_captcha() return webpage From 0ddcc15fd175a2a5962bc3f51dcaf89fbc18a9ab Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 20 Oct 2024 11:53:06 +0100 Subject: [PATCH 06/11] [XFileShare] Add geo-block detection --- youtube_dl/extractor/xfileshare.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 4dc3032e7..62ce75970 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -13,6 +13,7 @@ from ..utils import ( decode_packed_codes, determine_ext, ExtractorError, + get_element_by_class, get_element_by_id, int_or_none, merge_dicts, @@ -200,11 +201,20 @@ class XFileShareIE(InfoExtractor): host, 'embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id) webpage = self._download_webpage(url, video_id) - container_div = get_element_by_id('container', webpage) or webpage + main = self._search_regex( + r'(?s)
(.+)
', webpage, 'main', default=webpage) + container_div = ( + get_element_by_id('container', main) + or get_element_by_class('container', main) + or webpage) if self._search_regex( r'>This server is in maintenance mode\.', container_div, 'maint error', group=0, default=None): raise ExtractorError(clean_html(container_div), expected=True) + if self._search_regex( + 'not available in your country', container_div, + 'geo block', group=0, default=None): + self.raise_geo_restricted() if self._search_regex( self._FILE_NOT_FOUND_REGEXES, container_div, 'missing video error', group=0, default=None): From b80634ecf76a2cb1bdda51a830e4f94b05013eb0 Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 20 Oct 2024 12:28:08 +0100 Subject: [PATCH 07/11] [XFileShare] Re-factor and fix tests * update site list * support page with player data in