2016-10-02 11:39:18 +00:00
# coding: utf-8
2014-04-07 14:56:15 +00:00
from __future__ import unicode_literals
import base64
2021-03-14 13:49:23 +00:00
import io
2014-08-22 16:40:26 +00:00
import re
2021-03-14 13:49:23 +00:00
import sys
2014-04-07 14:56:15 +00:00
from . common import InfoExtractor
2016-04-23 10:28:49 +00:00
from . . compat import (
2018-01-23 15:23:12 +00:00
compat_b64decode ,
2016-05-03 08:50:16 +00:00
compat_struct_unpack ,
2016-04-23 10:28:49 +00:00
)
2014-04-07 14:56:15 +00:00
from . . utils import (
2022-09-17 18:07:29 +00:00
clean_html ,
2017-09-30 20:21:17 +00:00
determine_ext ,
2015-04-09 12:01:33 +00:00
ExtractorError ,
2015-02-27 11:24:51 +00:00
float_or_none ,
2021-03-14 13:49:23 +00:00
qualities ,
2015-04-04 11:11:55 +00:00
std_headers ,
2014-04-07 14:56:15 +00:00
)
2021-03-14 13:49:23 +00:00
_bytes_to_chr = ( lambda x : x ) if sys . version_info [ 0 ] == 2 else ( lambda x : map ( chr , x ) )
2014-08-22 16:40:26 +00:00
2022-09-17 15:26:02 +00:00
class RTVEPlayIE ( InfoExtractor ) :
IE_NAME = ' rtve.es:play '
IE_DESC = ' RTVE Play '
2022-09-17 18:11:58 +00:00
_VALID_URL = r ' https?://(?:www \ .)?rtve \ .es/(?P<kind>playz?/(?:audios|videos)|filmoteca)/[^/]+/[^/]+/(?P<id> \ d+) '
2014-04-07 14:56:15 +00:00
2014-08-22 16:40:26 +00:00
_TESTS = [ {
2014-04-07 14:56:15 +00:00
' url ' : ' http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/ ' ,
2022-09-17 16:13:44 +00:00
' md5 ' : ' 2c70aacf8a415d1b4e7fcc0525951162 ' ,
2014-04-07 14:56:15 +00:00
' info_dict ' : {
' id ' : ' 2491869 ' ,
' ext ' : ' mp4 ' ,
2022-09-17 18:07:29 +00:00
' title ' : ' Final de la Swiss Cup masculina: España-Suecia ' ,
' description ' : ' Swiss Cup masculina, Final: España-Suecia. ' ,
2015-02-27 11:24:51 +00:00
' duration ' : 5024.566 ,
2021-03-14 13:49:23 +00:00
' series ' : ' Balonmano ' ,
2014-04-07 14:56:15 +00:00
} ,
2021-03-14 13:49:23 +00:00
' expected_warnings ' : [ ' Failed to download MPD manifest ' , ' Failed to download m3u8 information ' ] ,
2014-08-22 16:40:26 +00:00
} , {
' note ' : ' Live stream ' ,
' url ' : ' http://www.rtve.es/alacarta/videos/television/24h-live/1694255/ ' ,
' info_dict ' : {
' id ' : ' 1694255 ' ,
2021-03-14 13:49:23 +00:00
' ext ' : ' mp4 ' ,
' title ' : ' re:^24H LIVE [0-9] {4} -[0-9] {2} -[0-9] {2} [0-9] {2} :[0-9] {2} $ ' ,
2022-09-17 18:07:29 +00:00
' description ' : ' 24H LIVE ' ,
2021-03-14 13:49:23 +00:00
' is_live ' : True ,
} ,
' params ' : {
' skip_download ' : ' live stream ' ,
2015-01-30 22:05:06 +00:00
} ,
2017-09-30 20:21:17 +00:00
} , {
' url ' : ' http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/ ' ,
2022-09-17 16:13:44 +00:00
' md5 ' : ' 30b8827cba25f39d1af5a7c482cc8ac5 ' ,
2017-09-30 20:21:17 +00:00
' info_dict ' : {
' id ' : ' 4236788 ' ,
' ext ' : ' mp4 ' ,
2022-09-17 18:07:29 +00:00
' title ' : ' Capítulo 104 ' ,
' description ' : ' md5:caae29ae04291875e611dd667fe84641 ' ,
2017-09-30 20:21:17 +00:00
' duration ' : 3222.0 ,
} ,
2021-03-14 13:49:23 +00:00
' expected_warnings ' : [ ' Failed to download MPD manifest ' , ' Failed to download m3u8 information ' ] ,
2015-01-30 22:12:53 +00:00
} , {
' url ' : ' http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve ' ,
' only_matching ' : True ,
2016-05-21 15:37:35 +00:00
} , {
' url ' : ' http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/ ' ,
' only_matching ' : True ,
2022-09-17 18:11:58 +00:00
} , {
' url ' : ' http://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/ ' ,
' md5 ' : ' ae06d27bff945c4e87a50f89f6ce48ce ' ,
' info_dict ' : {
' id ' : ' 5889192 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Códigos informáticos ' ,
' description ' : ' md5:72b0d7c1ca20fd327bdfff7ac0171afb ' ,
' thumbnail ' : r ' re:https?://.+/1598856591583.jpg ' ,
' duration ' : 349.440 ,
} ,
2014-08-22 16:40:26 +00:00
} ]
2014-04-07 14:56:15 +00:00
2015-04-04 11:11:55 +00:00
def _real_initialize ( self ) :
user_agent_b64 = base64 . b64encode ( std_headers [ ' User-Agent ' ] . encode ( ' utf-8 ' ) ) . decode ( ' utf-8 ' )
2021-03-14 13:49:23 +00:00
self . _manager = self . _download_json (
2015-04-04 11:11:55 +00:00
' http://www.rtve.es/odin/loki/ ' + user_agent_b64 ,
2021-03-14 13:49:23 +00:00
None , ' Fetching manager info ' ) [ ' manager ' ]
@staticmethod
def _decrypt_url ( png ) :
encrypted_data = io . BytesIO ( compat_b64decode ( png ) [ 8 : ] )
while True :
length = compat_struct_unpack ( ' !I ' , encrypted_data . read ( 4 ) ) [ 0 ]
chunk_type = encrypted_data . read ( 4 )
if chunk_type == b ' IEND ' :
break
data = encrypted_data . read ( length )
if chunk_type == b ' tEXt ' :
2022-09-17 15:21:24 +00:00
alphabet_data , text = data . replace ( b ' \0 ' , b ' ' ) . split ( b ' # ' )
components = text . split ( b ' %% ' )
if len ( components ) < 2 :
components . insert ( 0 , b ' ' )
quality , url_data = components
2021-03-14 13:49:23 +00:00
alphabet = [ ]
e = 0
d = 0
for l in _bytes_to_chr ( alphabet_data ) :
if d == 0 :
alphabet . append ( l )
d = e = ( e + 1 ) % 4
else :
d - = 1
url = ' '
f = 0
e = 3
b = 1
for letter in _bytes_to_chr ( url_data ) :
if f == 0 :
l = int ( letter ) * 10
f = 1
else :
if e == 0 :
l + = int ( letter )
url + = alphabet [ l ]
e = ( b + 3 ) % 4
f = 0
b + = 1
else :
e - = 1
yield quality . decode ( ) , url
encrypted_data . read ( 4 ) # CRC
def _extract_png_formats ( self , video_id ) :
png = self . _download_webpage (
2022-09-17 15:27:57 +00:00
' http://ztnr.rtve.es/ztnr/movil/thumbnail/ %s /videos/ %s .png ' % ( self . _manager , video_id ) ,
2021-03-14 13:49:23 +00:00
video_id , ' Downloading url information ' , query = { ' q ' : ' v2 ' } )
q = qualities ( [ ' Media ' , ' Alta ' , ' HQ ' , ' HD_READY ' , ' HD_FULL ' ] )
formats = [ ]
for quality , video_url in self . _decrypt_url ( png ) :
ext = determine_ext ( video_url )
if ext == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
video_url , video_id , ' mp4 ' , ' m3u8_native ' ,
m3u8_id = ' hls ' , fatal = False ) )
elif ext == ' mpd ' :
formats . extend ( self . _extract_mpd_formats (
video_url , video_id , ' dash ' , fatal = False ) )
else :
formats . append ( {
' format_id ' : quality ,
' quality ' : q ( quality ) ,
' url ' : video_url ,
} )
self . _sort_formats ( formats )
return formats
2015-04-04 11:11:55 +00:00
2014-04-07 14:56:15 +00:00
def _real_extract ( self , url ) :
2022-09-17 18:11:58 +00:00
groups = re . match ( self . _VALID_URL , url ) . groupdict ( )
is_audio = groups . get ( ' kind ' ) == ' play/audios '
return self . _real_extract_from_id ( groups [ ' id ' ] , is_audio )
2022-09-17 17:08:04 +00:00
2022-09-17 18:11:58 +00:00
def _real_extract_from_id ( self , video_id , is_audio = False ) :
kind = ' audios ' if is_audio else ' videos '
2014-04-07 14:56:15 +00:00
info = self . _download_json (
2022-09-17 18:11:58 +00:00
' http://www.rtve.es/api/ %s / %s .json ' % ( kind , video_id ) ,
2014-04-07 14:56:15 +00:00
video_id ) [ ' page ' ] [ ' items ' ] [ 0 ]
2022-09-17 18:07:29 +00:00
if ( info . get ( ' pubState ' ) or { } ) . get ( ' code ' ) == ' DESPU ' :
2015-04-09 12:01:33 +00:00
raise ExtractorError ( ' The video is no longer available ' , expected = True )
2021-03-14 13:49:23 +00:00
title = info [ ' title ' ] . strip ( )
formats = self . _extract_png_formats ( video_id )
2017-09-30 20:21:17 +00:00
2015-02-23 20:52:07 +00:00
subtitles = None
2021-03-14 13:49:23 +00:00
sbt_file = info . get ( ' sbtFile ' )
if sbt_file :
subtitles = self . extract_subtitles ( video_id , sbt_file )
2022-09-17 18:07:29 +00:00
is_live = info . get ( ' consumption ' ) == ' live '
2015-02-23 20:52:07 +00:00
2014-04-07 14:56:15 +00:00
return {
' id ' : video_id ,
2021-03-14 13:49:23 +00:00
' title ' : self . _live_title ( title ) if is_live else title ,
2017-09-30 20:21:17 +00:00
' formats ' : formats ,
2022-09-17 18:07:29 +00:00
' url ' : info . get ( ' htmlUrl ' ) ,
' description ' : clean_html ( info . get ( ' description ' ) ) ,
' thumbnail ' : info . get ( ' thumbnail ' ) ,
2015-02-23 20:52:07 +00:00
' subtitles ' : subtitles ,
2021-03-14 13:49:23 +00:00
' duration ' : float_or_none ( info . get ( ' duration ' ) , 1000 ) ,
' is_live ' : is_live ,
2022-09-17 18:07:29 +00:00
' series ' : ( info . get ( ' programInfo ' ) or { } ) . get ( ' title ' ) ,
2014-08-22 16:40:26 +00:00
}
2015-02-23 20:52:07 +00:00
def _get_subtitles ( self , video_id , sub_file ) :
subs = self . _download_json (
sub_file + ' .json ' , video_id ,
' Downloading subtitles info ' ) [ ' page ' ] [ ' items ' ]
2015-02-24 10:37:27 +00:00
return dict (
( s [ ' lang ' ] , [ { ' ext ' : ' vtt ' , ' url ' : s [ ' src ' ] } ] )
2015-02-23 20:52:07 +00:00
for s in subs )
2015-03-15 21:17:40 +00:00
2022-09-17 15:26:02 +00:00
class RTVEInfantilIE ( RTVEPlayIE ) :
2015-03-15 21:17:40 +00:00
IE_NAME = ' rtve.es:infantil '
IE_DESC = ' RTVE infantil '
2021-03-14 13:49:23 +00:00
_VALID_URL = r ' https?://(?:www \ .)?rtve \ .es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/ '
2015-03-15 20:45:14 +00:00
_TESTS = [ {
2022-09-17 16:13:44 +00:00
' url ' : ' https://www.rtve.es/infantil/serie/dino-ranch/video/pequeno-gran-ayudante/6693248/ ' ,
' md5 ' : ' 06d3f57eec593ad93fe9dcf079fbd940 ' ,
2015-03-15 20:45:14 +00:00
' info_dict ' : {
2022-09-17 16:13:44 +00:00
' id ' : ' 6693248 ' ,
2015-03-15 20:45:14 +00:00
' ext ' : ' mp4 ' ,
2022-09-17 16:13:44 +00:00
' title ' : ' Un pequeño gran ayudante ' ,
2022-09-17 18:07:29 +00:00
' description ' : ' md5:144ca351e31f9ee99a637ab9fc2787d5 ' ,
' thumbnail ' : r ' re:https?://.+/1663318364501 \ .jpg ' ,
2022-09-17 16:13:44 +00:00
' duration ' : 691.44 ,
2015-03-15 20:45:14 +00:00
} ,
2021-03-14 13:49:23 +00:00
' expected_warnings ' : [ ' Failed to download MPD manifest ' , ' Failed to download m3u8 information ' ] ,
2015-03-15 21:17:40 +00:00
} ]
2015-03-15 20:45:14 +00:00
2022-09-17 15:26:02 +00:00
class RTVELiveIE ( RTVEPlayIE ) :
2014-08-22 16:40:26 +00:00
IE_NAME = ' rtve.es:live '
IE_DESC = ' RTVE.es live streams '
2022-09-17 15:27:57 +00:00
_VALID_URL = r ' https?://(?:www \ .)?rtve \ .es/play/videos/directo/(?P<id>.+) '
2014-08-22 16:40:26 +00:00
_TESTS = [ {
2022-09-17 16:13:44 +00:00
' url ' : ' https://www.rtve.es/play/videos/directo/la-1/ ' ,
2014-08-22 16:40:26 +00:00
' info_dict ' : {
2022-09-17 16:13:44 +00:00
' id ' : ' 1688877 ' ,
2016-02-29 19:57:26 +00:00
' ext ' : ' mp4 ' ,
2021-03-14 13:49:23 +00:00
' title ' : ' re:^La 1 [0-9] {4} -[0-9] {2} -[0-9] {2} [0-9] {2} :[0-9] {2} $ ' ,
2022-09-17 18:07:29 +00:00
' description ' : ' La 1 ' ,
2014-08-22 16:40:26 +00:00
} ,
' params ' : {
' skip_download ' : ' live stream ' ,
}
2022-09-17 16:13:44 +00:00
} , {
' url ' : ' https://www.rtve.es/play/videos/directo/canales-lineales/la-1/ ' ,
' info_dict ' : {
' id ' : ' 1688877 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' re:^La 1 [0-9] {4} -[0-9] {2} -[0-9] {2} [0-9] {2} :[0-9] {2} $ ' ,
2022-09-17 18:07:29 +00:00
' description ' : ' La 1 ' ,
2022-09-17 16:13:44 +00:00
} ,
' params ' : {
' skip_download ' : ' live stream ' ,
}
} , {
' url ' : ' https://www.rtve.es/play/videos/directo/canales-lineales/capilla-ardiente-isabel-westminster/10886/ ' ,
' info_dict ' : {
' id ' : ' 1938028 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' re:^Mas24 - 1 [0-9] {4} -[0-9] {2} -[0-9] {2} [0-9] {2} :[0-9] {2} $ ' ,
2022-09-17 18:07:29 +00:00
' description ' : ' Mas24 - 1 ' ,
2022-09-17 16:13:44 +00:00
} ,
' params ' : {
' skip_download ' : ' live stream ' ,
}
2014-08-22 16:40:26 +00:00
} ]
def _real_extract ( self , url ) :
2022-09-17 17:08:04 +00:00
webpage = self . _download_webpage ( url , self . _match_id ( url ) )
asset_id = self . _search_regex (
r ' class=[ " \' ].*? \ bvideoPlayer \ b.*?[ " \' ][^>]+data-setup=[^>]+?(?: " |")idAsset(?: " |") \ s*: \ s*(?: " |")( \ d+)(?: " |") ' ,
2016-12-24 21:02:29 +00:00
webpage , ' internal video ID ' )
2022-09-17 17:08:04 +00:00
return self . _real_extract_from_id ( asset_id )
2016-07-13 19:02:34 +00:00
class RTVETelevisionIE ( InfoExtractor ) :
IE_NAME = ' rtve.es:television '
2022-09-17 15:27:03 +00:00
# https://www.rtve.es/SECTION/YYYYMMDD/CONTENT_SLUG/CONTENT_ID.shtml
_VALID_URL = r ' https?://(?:www \ .)?rtve \ .es/[^/]+/ \ d {8} /[^/]+/(?P<id> \ d+) \ .shtml '
2016-07-13 19:02:34 +00:00
2022-09-17 16:13:44 +00:00
_TESTS = [ {
' url ' : ' https://www.rtve.es/television/20220916/destacados-festival-san-sebastian-rtve-play/2395620.shtml ' ,
' info_dict ' : {
' id ' : ' 6668919 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Las películas del Festival de San Sebastián en RTVE Play ' ,
2022-09-17 18:07:29 +00:00
' description ' : ' El \xa0 Festival de San Sebastián vuelve a llenarse de artistas. Y en su honor, \xa0 RTVE Play \xa0 destacará cada viernes una \xa0 película galardonada \xa0 con la \xa0 Concha de Oro \xa0 en su catálogo. ' ,
2022-09-17 16:13:44 +00:00
' duration ' : 20.048 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} , {
' url ' : ' https://www.rtve.es/noticias/20220917/penelope-cruz-san-sebastian-premio-nacional/2402565.shtml ' ,
2016-07-13 19:02:34 +00:00
' info_dict ' : {
2022-09-17 16:13:44 +00:00
' id ' : ' 6694087 ' ,
2016-07-13 19:02:34 +00:00
' ext ' : ' mp4 ' ,
2022-09-17 16:13:44 +00:00
' title ' : ' Penélope Cruz recoge el Premio Nacional de Cinematografía: " No dejen nunca de proteger nuestro cine " ' ,
2022-09-17 18:07:29 +00:00
' description ' : ' md5:eda9e6baa78dbbbcc7708c0cc8150a91 ' ,
2022-09-17 16:13:44 +00:00
' duration ' : 388.2 ,
2016-07-13 19:02:34 +00:00
} ,
' params ' : {
' skip_download ' : True ,
} ,
2022-09-17 16:13:44 +00:00
} , {
' url ' : ' https://www.rtve.es/deportes/20220917/motogp-bagnaia-pole-marquez-decimotercero-motorland-aragon/2402566.shtml ' ,
' info_dict ' : {
' id ' : ' 6694142 ' ,
' ext ' : ' mp4 ' ,
' title ' : " Bagnaia logra su quinta ' pole ' del año y Márquez partirá decimotercero " ,
2022-09-17 18:07:29 +00:00
' description ' : ' md5:07e2ccb983a046cb42f896cce225f0a7 ' ,
2022-09-17 16:13:44 +00:00
' duration ' : 153.44 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} , {
' url ' : ' https://www.rtve.es/playz/20220807/covaleda-fest-final/2394809.shtml ' ,
' info_dict ' : {
' id ' : ' 6665408 ' ,
' ext ' : ' mp4 ' ,
2022-09-17 18:07:29 +00:00
' title ' : ' Covaleda Fest (Soria) - Día 3 con Marc Seguí y Paranoid 1966 ' ,
' description ' : ' Festivales Playz viaja a Covaleda, Soria, para contarte todo lo que sucede en el Covaleda Fest. Entrevistas, challenges a los artistas, juegos... Khan, Adriana Jiménez y María García no dejarán pasar ni una. ¡No te lo pierdas! ' ,
2022-09-17 16:13:44 +00:00
' duration ' : 12009.92 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ]
2016-07-13 19:02:34 +00:00
def _real_extract ( self , url ) :
page_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , page_id )
alacarta_url = self . _search_regex (
2022-09-17 15:27:57 +00:00
r ' data-location= " alacarta_videos " [^<]+url":"(https?://www \ .rtve \ .es/play.+?)& ' ,
2016-07-13 19:02:34 +00:00
webpage , ' alacarta url ' , default = None )
if alacarta_url is None :
raise ExtractorError (
' The webpage doesn \' t contain any video ' , expected = True )
2022-09-17 15:26:02 +00:00
return self . url_result ( alacarta_url , ie = RTVEPlayIE . ie_key ( ) )