From dade9111f11c2eba8216e88e57bace2c948d4163 Mon Sep 17 00:00:00 2001 From: Olivier Trichet Date: Sun, 4 Sep 2022 19:44:05 -0400 Subject: [PATCH] [RadioFrance] Remove old Radio France stations extractors These are not working anymore after their respectives websites were merged into www.radiofrance.fr. --- youtube_dl/extractor/extractors.py | 3 -- youtube_dl/extractor/franceculture.py | 73 --------------------------- youtube_dl/extractor/franceinter.py | 59 ---------------------- youtube_dl/extractor/radiofrance.py | 59 ---------------------- 4 files changed, 194 deletions(-) delete mode 100644 youtube_dl/extractor/franceculture.py delete mode 100644 youtube_dl/extractor/franceinter.py delete mode 100644 youtube_dl/extractor/radiofrance.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 947cbe8fd..e583d42fb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -403,8 +403,6 @@ from .foxnews import ( FoxNewsArticleIE, ) from .foxsports import FoxSportsIE -from .franceculture import FranceCultureIE -from .franceinter import FranceInterIE from .francetv import ( FranceTVIE, FranceTVSiteIE, @@ -995,7 +993,6 @@ from .radiocanada import ( from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE -from .radiofrance import RadioFranceIE from .rai import ( RaiPlayIE, RaiPlayLiveIE, diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py deleted file mode 100644 index 14f4cb489..000000000 --- a/youtube_dl/extractor/franceculture.py +++ /dev/null @@ -1,73 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - extract_attributes, - int_or_none, -) - - -class FranceCultureIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks', - 'info_dict': { - 'id': 'rendez-vous-au-pays-des-geeks', - 'display_id': 'rendez-vous-au-pays-des-geeks', - 'ext': 'mp3', - 'title': 'Rendez-vous au pays des geeks', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20140301', - 'timestamp': 1393700400, - 'vcodec': 'none', - } - }, { - # no thumbnail - 'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - video_data = extract_attributes(self._search_regex( - r'''(?sx) - (?: - | - ]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*> - ).*? - (]+data-(?:url|asset-source)="[^"]+"[^>]+>) - ''', - webpage, 'video data')) - - video_url = video_data.get('data-url') or video_data['data-asset-source'] - title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage) - - description = self._html_search_regex( - r'(?s)]+class="intro"[^>]*>.*?

(.+?)

', - webpage, 'description', default=None) - thumbnail = self._search_regex( - r'(?s)]+itemtype="https://schema.org/ImageObject"[^>]*>.*?]+(?:data-dejavu-)?src="([^"]+)"', - webpage, 'thumbnail', default=None) - uploader = self._html_search_regex( - r'(?s)(.*?)', - webpage, 'uploader', default=None) - ext = determine_ext(video_url.lower()) - - return { - 'id': display_id, - 'display_id': display_id, - 'url': video_url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'ext': ext, - 'vcodec': 'none' if ext == 'mp3' else None, - 'uploader': uploader, - 'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')), - 'duration': int_or_none(video_data.get('data-duration')), - } diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py deleted file mode 100644 index ae822a50e..000000000 --- a/youtube_dl/extractor/franceinter.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import month_by_name - - -class FranceInterIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P[^?#]+)' - - _TEST = { - 'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016', - 'md5': '9e54d7bdb6fdc02a841007f8a975c094', - 'info_dict': { - 'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016', - 'ext': 'mp3', - 'title': 'Affaire Cahuzac : le contentieux du compte en Suisse', - 'description': 'md5:401969c5d318c061f86bda1fa359292b', - 'thumbnail': r're:^https?://.*\.jpg', - 'upload_date': '20160907', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - video_url = self._search_regex( - r'(?s)]+class=["\']page-diffusion["\'][^>]*>.*?]+data-url=(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'video url', group='url') - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage) - - upload_date_str = self._search_regex( - r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<', - webpage, 'upload date', fatal=False) - if upload_date_str: - upload_date_list = upload_date_str.split() - upload_date_list.reverse() - upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0) - upload_date_list[2] = '%02d' % int(upload_date_list[2]) - upload_date = ''.join(upload_date_list) - else: - upload_date = None - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'formats': [{ - 'url': video_url, - 'vcodec': 'none', - }], - } diff --git a/youtube_dl/extractor/radiofrance.py b/youtube_dl/extractor/radiofrance.py deleted file mode 100644 index a8afc0014..000000000 --- a/youtube_dl/extractor/radiofrance.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class RadioFranceIE(InfoExtractor): - _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P[^?#]+)' - IE_NAME = 'radiofrance' - - _TEST = { - 'url': 'http://maison.radiofrance.fr/radiovisions/one-one', - 'md5': 'bdbb28ace95ed0e04faab32ba3160daf', - 'info_dict': { - 'id': 'one-one', - 'ext': 'ogg', - 'title': 'One to one', - 'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.", - 'uploader': 'Thomas Hercouët', - }, - } - - def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('id') - - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'

(.*?)

', webpage, 'title') - description = self._html_search_regex( - r'
(.*?)
', - webpage, 'description', fatal=False) - uploader = self._html_search_regex( - r'
  © (.*?)
', - webpage, 'uploader', fatal=False) - - formats_str = self._html_search_regex( - r'class="jp-jplayer[^"]*" data-source="([^"]+)">', - webpage, 'audio URLs') - formats = [ - { - 'format_id': fm[0], - 'url': fm[1], - 'vcodec': 'none', - 'preference': i, - } - for i, fm in - enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)) - ] - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': description, - 'uploader': uploader, - }