# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( unified_timestamp, url_or_none, ) class UNOIE(InfoExtractor): _VALID_URL = r'https?://media\.un\.org/(?:\w+/)+(?Pk\d[\w]+)' _TESTS = [{ 'url': 'https://media.un.org/en/asset/k1r/k1r3vy9ikk', # 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb', 'info_dict': { 'id': '1_r3vy9ikk', 'ext': 'mp4', 'title': 'md5:abde2a46d396051535e5e6fd6f627a19', 'description': 'md5:2cba11ee153ae3e6ae2c629e7c4e39b0', 'thumbnail': 're:https?://.+/thumbnail/.+', 'duration': 5768, 'timestamp': 1625216872, 'upload_date': '20210702', 'uploader_id': 'UNWebTV_New_York', }, }] def _real_extract(self, url): video_id = self._match_id(url) video_id = video_id[1:2] + '_' + video_id[2:] webpage = self._download_webpage(url, video_id) title = ( self._html_search_meta(('title', 'og:title'), webpage) or self._html_search_regex(r']*>([^<]+)