From 63af7465cc981574e2989a6c2748fdef732997aa Mon Sep 17 00:00:00 2001 From: Jastrab Date: Sat, 15 Jan 2022 21:08:06 +0100 Subject: [PATCH 1/3] [rtvs] Fixed extractor for Slovak television and radio --- youtube_dl/extractor/rtvs.py | 77 ++++++++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/rtvs.py b/youtube_dl/extractor/rtvs.py index 6573b260d..d12a24fc4 100644 --- a/youtube_dl/extractor/rtvs.py +++ b/youtube_dl/extractor/rtvs.py @@ -1,7 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..utils import ( + url_or_none, + determine_ext +) class RTVSIE(InfoExtractor): @@ -26,7 +32,8 @@ class RTVSIE(InfoExtractor): 'id': '63118', 'ext': 'mp4', 'title': 'Amaro Džives - Náš deň', - 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.' + 'description': + 'Galavečer pri príležitosti Medzinárodného dňa Rómov.' }, 'params': { 'skip_download': True, @@ -36,12 +43,66 @@ class RTVSIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + if url.find('/radio/') != -1: + a2 = url.split('/')[-1] + a1 = url.split('/')[-2] + embed = self._download_webpage( + "https://www.rtvs.sk/embed/radio/archive/%s/%s" % (a1, a2), + video_id) + audio_id = re.search('audio5f.json?id=(?P[^\"]+)', embed) + audio_id = audio_id.group('id') + info = self._download_json( + "https://www.rtvs.sk/json/audio5f.json?id=%s" % audio_id, + audio_id) - playlist_url = self._search_regex( - r'playlist["\']?\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'playlist url', group='url') + formats = [] + formats.append({ + 'url': info['playlist'][0]['sources'][0]['src'], + 'format_id': None, + 'height': 0 + }) + info = info['playlist'][0] + return { + 'id': audio_id, + 'title': info.get('title'), + 'thumbnail': info.get('image'), + 'formats': formats + } + else: + info = self._download_json( + "https://www.rtvs.sk/json/archive5f.json?id=%s" % video_id, + video_id) + info = info.get('clip') - data = self._download_json( - playlist_url, video_id, 'Downloading playlist')[0] - return self._parse_jwplayer_data(data, video_id=video_id) + formats = [] + for format_id, format_list in info.items(): + if not isinstance(format_list, list): + format_list = [format_list] + for format_dict in format_list: + if not isinstance(format_dict, dict): + continue + format_url = url_or_none(format_dict.get('src')) + format_type = format_dict.get('type') + ext = determine_ext(format_url) + if (format_type == 'application/x-mpegURL' + or format_id == 'HLS' or ext == 'm3u8'): + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + elif (format_type == 'application/dash+xml' + or format_id == 'DASH' or ext == 'mpd'): + pass + else: + formats.append({ + 'url': format_url, + }) + formats = sorted(formats, key=lambda i: i['tbr']) + dt = info.get('datetime_create') + return { + 'id': video_id, + 'title': info.get('title') + '-' + dt[:10], + 'thumbnail': info.get('image'), + 'description': info.get('description'), + 'formats': formats + } From 4cf557e765951f9d558d35f44a3202f273d12285 Mon Sep 17 00:00:00 2001 From: Jastrab Date: Sat, 15 Jan 2022 21:27:57 +0100 Subject: [PATCH 2/3] [markiza] Fixed extractor --- youtube_dl/extractor/markiza.py | 111 +++++++++++++++++++++----------- 1 file changed, 75 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/markiza.py b/youtube_dl/extractor/markiza.py index def960a0c..67a9bb07c 100644 --- a/youtube_dl/extractor/markiza.py +++ b/youtube_dl/extractor/markiza.py @@ -1,21 +1,23 @@ # coding: utf-8 from __future__ import unicode_literals +# update 15.01.2022 jastrab import re +import json from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( orderedSet, - parse_duration, - try_get, + url_or_none, + determine_ext ) class MarkizaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P\d+)(?:[_/]|$)' + _VALID_URL = r'https:\/\/(?:www\.)?videoarchiv\.markiza\.sk\/(?:video\/(?:[^\/]+\/)*|embed\/)epizoda\/(?P\d+)(?:[\_\/\-]|$)' _TESTS = [{ - 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109', + 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/\ + 84723_oteckovia-109', 'md5': 'ada4e9fad038abeed971843aa028c7b0', 'info_dict': { 'id': '139078', @@ -26,54 +28,89 @@ class MarkizaIE(InfoExtractor): 'duration': 2760, }, }, { - 'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny', + 'url': ' https://videoarchiv.markiza.sk/video/laska-na-prenajom/epizoda/58779-seria-1-epizoda-14', 'info_dict': { 'id': '85430', 'title': 'Televízne noviny', }, 'playlist_count': 23, }, { - 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723', + 'url': 'https://videoarchiv.markiza.sk/video/oteckovia/84723', 'only_matching': True, }, { - 'url': 'http://videoarchiv.markiza.sk/video/84723', + 'url': 'https://videoarchiv.markiza.sk/video/84723', 'only_matching': True, }, { - 'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak', + 'url': 'https://videoarchiv.markiza.sk/video/filmy/85190_kamenak', 'only_matching': True, }, { - 'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky', + 'url': 'https://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky', 'only_matching': True, }, { - 'url': 'http://videoarchiv.markiza.sk/embed/85295', + 'url': 'https://videoarchiv.markiza.sk/embed/85295', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + embed = self._search_regex( + r'