1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2025-01-25 20:29:51 +00:00

update extractor

This commit is contained in:
sgerwk 2020-03-22 18:37:24 +01:00
parent 7224c2af4a
commit a74eacc461

View File

@ -17,70 +17,72 @@ class ParamountIE(InfoExtractor):
'id': 'dbf6d5d5-1a95-41ac-b17b-b5caca227b25', 'id': 'dbf6d5d5-1a95-41ac-b17b-b5caca227b25',
'ext': 'mp4', 'ext': 'mp4',
'title': 'SPECIALE STEPHEN KING - Speciali video, Paramount Network', 'title': 'SPECIALE STEPHEN KING - Speciali video, Paramount Network',
'description': 'Tutti gli speciali di Paramount Network curiosità, approfondimenti e aggiornamenti su film, serie tv e personaggi del cinema.' 'description': 'Tutti gli speciali di Paramount Network curiosità, approfondimenti e aggiornamenti su film, serie tv e personaggi del cinema.'
} }
} }
def _obtain_akamaihd_formats(self, url):
if self._downloader.params.get('verbose', False):
listpage = self._download_webpage(url, 'akamaihd format list')
self.to_screen('formats page = %s' % (listpage))
listpage = self._download_xml(url, 'akamaihd format list')
formats = []
for rendition in listpage.findall('./video/item/rendition'):
fmt = {
'width': int_or_none(rendition.get('width')),
'height': int_or_none(rendition.get('height')),
'url': rendition.find('./src').text
}
formats.append(fmt)
return formats
def _real_extract(self, url): def _real_extract(self, url):
# webpage # webpage
webpage = self._download_webpage(url, 'webpage') webpage = self._download_webpage(url, 'webpage')
# self.to_screen('webpage = %s' % (webpage))
id = self._html_search_regex( id = self._html_search_regex(
r'mgid:arc:content:paramount(?:network|channel)\.(?:it|es):([0-9a-f-]+)', r'mgid:arc:content:web.paramount(?:network|channel|plus)\.(?:it|es|com):([0-9a-f-]+)',
webpage, 'id', fatal=False) \
or \
self._html_search_regex(
r'data-mtv-id="([0-9a-f-]*)"',
webpage, 'id', fatal=False) \
or \
self._html_search_regex(
r'"item_longId" *: *"([0-9a-f-]*)"',
webpage, 'id') webpage, 'id')
self.to_screen('id = %s' % (id)) self.to_screen('id = %s' % (id))
uri = self._html_search_regex( episode = self._html_search_regex(
r'(mgid:arc:content:paramount(?:network|channel)\.(?:it|es):(?:[0-9a-f-]+))', r'mgid:arc:episode:paramount.intl:([0-9a-f-]+)',
webpage, 'uri', fatal=False) \ webpage, 'episode', fatal=False) \
or \ or \
self._html_search_regex( self._html_search_regex(
r'data-mtv-uri="([0-9a-z:\.-]*)"', r'"contentId" *: *"([0-9a-f-]*)"',
webpage, 'uri') webpage, 'episode')
self.to_screen('uri = %s' % (uri)) self.to_screen('episode = %s' % (episode))
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
self.to_screen('title = %s' % (title)) self.to_screen('title = %s' % (title))
# episode page
server = 'https://media.mtvnservices.com'
prefix = '/pmt/e1/access/index.html'
argument1 = 'uri=mgid:arc:episode:paramount.intl:%s' % (episode)
argument2 = 'configtype=edge'
epurl = '%s%s?%s&%s' % (server, prefix, argument1, argument2)
self.to_screen('epurl = %s' % (epurl))
eppage = self._download_webpage(epurl, 'episode url page',
headers = {'Referer': url})
self.to_screen('format list page = %s' % (eppage))
uri = self._html_search_regex(
r'(mgid:arc:video:paramount.intl:(?:[0-9a-f-]+))',
eppage, 'uri')
self.to_screen('uri = %s' % (uri))
ep = self._html_search_regex(
r'&ep=([0-9a-f-]+)"',
eppage, 'ep')
self.to_screen('ep = %s' % (ep))
# list of formats # list of formats
server = 'https://mediautilssvcs-a.akamaihd.net' server = 'https://media-utils.mtvnservices.com'
prefix = '/services/MediaGenerator/' prefix = '/services/MediaGenerator/'
arguments = 'accountOverride=esperanto.mtvi.com' arg1 = 'arcStage=live&accountOverride=intl.mtvi.com&ep=%s' % (ep)
listurl = '%s%s%s?%s' % (server, prefix, uri, arguments) arg2 = '&acceptMethods=hls&format=json&https=true&isEpisode=true'
listurl = '%s%s%s?%s%s' % (server, prefix, uri, arg1, arg2)
self.to_screen('listurl = %s' % (listurl)) self.to_screen('listurl = %s' % (listurl))
formats = self._obtain_akamaihd_formats(listurl)
if self._downloader.params.get('verbose', False): listpage = self._download_json(listurl, 'url list page')
self.to_screen('formats = %s' % (formats)) self.to_screen('listpage = %s' % (listpage))
src = listpage['package']['video']['item'][0]['rendition'][0]['src']
self.to_screen('src = %s' % (src))
return { return {
'id': id, 'id': id,
'formats': formats, 'formats': self._extract_m3u8_formats(src, id),
'title': title, 'title': title,
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'thumbnail': self._html_search_meta('thumbnail', webpage, fatal=False)
} }