mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-23 16:36:48 +00:00
[bbc] Extract legacy playlist embedded media
This commit is contained in:
parent
f790c43f6e
commit
e6174ee975
@ -29,6 +29,14 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
_MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
|
||||||
|
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
|
||||||
|
|
||||||
|
_NAMESPACES = (
|
||||||
|
_MEDIASELECTION_NS,
|
||||||
|
_EMP_PLAYLIST_NS,
|
||||||
|
)
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||||
@ -194,6 +202,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_connection(self, connection, programme_id):
|
def _extract_connection(self, connection, programme_id):
|
||||||
formats = []
|
formats = []
|
||||||
|
kind = connection.get('kind')
|
||||||
protocol = connection.get('protocol')
|
protocol = connection.get('protocol')
|
||||||
supplier = connection.get('supplier')
|
supplier = connection.get('supplier')
|
||||||
if protocol == 'http':
|
if protocol == 'http':
|
||||||
@ -219,7 +228,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': href,
|
'url': href,
|
||||||
'format_id': supplier,
|
'format_id': supplier or kind or protocol,
|
||||||
})
|
})
|
||||||
elif protocol == 'rtmp':
|
elif protocol == 'rtmp':
|
||||||
application = connection.get('application', 'ondemand')
|
application = connection.get('application', 'ondemand')
|
||||||
@ -239,16 +248,24 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_items(self, playlist):
|
def _extract_items(self, playlist):
|
||||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||||
|
|
||||||
|
def _findall_ns(self, element, xpath):
|
||||||
|
elements = []
|
||||||
|
for ns in self._NAMESPACES:
|
||||||
|
elements.extend(element.findall(xpath % ns))
|
||||||
|
return elements
|
||||||
|
|
||||||
def _extract_medias(self, media_selection):
|
def _extract_medias(self, media_selection):
|
||||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
|
||||||
|
if error is None:
|
||||||
|
media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
|
||||||
if error is not None:
|
if error is not None:
|
||||||
raise BBCCoUkIE.MediaSelectionError(error.get('id'))
|
raise BBCCoUkIE.MediaSelectionError(error.get('id'))
|
||||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
return self._findall_ns(media_selection, './{%s}media')
|
||||||
|
|
||||||
def _extract_connections(self, media):
|
def _extract_connections(self, media):
|
||||||
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
|
return self._findall_ns(media, './{%s}connection')
|
||||||
|
|
||||||
def _extract_video(self, media, programme_id):
|
def _extract_video(self, media, programme_id):
|
||||||
formats = []
|
formats = []
|
||||||
@ -262,13 +279,14 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
conn_formats = self._extract_connection(connection, programme_id)
|
conn_formats = self._extract_connection(connection, programme_id)
|
||||||
for format in conn_formats:
|
for format in conn_formats:
|
||||||
format.update({
|
format.update({
|
||||||
'format_id': '%s_%s' % (service, format['format_id']),
|
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
'vbr': vbr,
|
'vbr': vbr,
|
||||||
'vcodec': vcodec,
|
'vcodec': vcodec,
|
||||||
'filesize': file_size,
|
'filesize': file_size,
|
||||||
})
|
})
|
||||||
|
if service:
|
||||||
|
format['format_id'] = '%s_%s' % (service, format['format_id'])
|
||||||
formats.extend(conn_formats)
|
formats.extend(conn_formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
@ -383,7 +401,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
url, playlist_id, 'Downloading legacy playlist XML')
|
url, playlist_id, 'Downloading legacy playlist XML')
|
||||||
|
|
||||||
def _extract_from_legacy_playlist(self, playlist, playlist_id):
|
def _extract_from_legacy_playlist(self, playlist, playlist_id):
|
||||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
|
||||||
if no_items is not None:
|
if no_items is not None:
|
||||||
reason = no_items.get('reason')
|
reason = no_items.get('reason')
|
||||||
if reason == 'preAvailability':
|
if reason == 'preAvailability':
|
||||||
@ -400,8 +418,8 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
kind = item.get('kind')
|
kind = item.get('kind')
|
||||||
if kind != 'programme' and kind != 'radioProgramme':
|
if kind != 'programme' and kind != 'radioProgramme':
|
||||||
continue
|
continue
|
||||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
|
||||||
description_el = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary')
|
description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
|
||||||
description = description_el.text if description_el else None
|
description = description_el.text if description_el else None
|
||||||
|
|
||||||
def get_programme_id(item):
|
def get_programme_id(item):
|
||||||
@ -411,16 +429,18 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
||||||
return value
|
return value
|
||||||
get_from_attributes(item)
|
get_from_attributes(item)
|
||||||
mediator = item.find('./{http://bbc.co.uk/2008/emp/playlist}mediator')
|
mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
|
||||||
if mediator is not None:
|
if mediator is not None:
|
||||||
return get_from_attributes(mediator)
|
return get_from_attributes(mediator)
|
||||||
|
|
||||||
programme_id = get_programme_id(item)
|
programme_id = get_programme_id(item)
|
||||||
duration = int_or_none(item.get('duration'))
|
duration = int_or_none(item.get('duration'))
|
||||||
# TODO: programme_id can be None and media items can be incorporated right inside
|
|
||||||
# playlist's item (e.g. http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
if programme_id:
|
||||||
# as f4m and m3u8
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
else:
|
||||||
|
formats, subtitles = self._process_media_selector(item, playlist_id)
|
||||||
|
programme_id = playlist_id
|
||||||
|
|
||||||
return programme_id, title, description, duration, formats, subtitles
|
return programme_id, title, description, duration, formats, subtitles
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user