2024-03-05 13:11:49 +00:00
# coding: utf-8
2014-06-26 18:30:44 +00:00
from __future__ import unicode_literals
2024-03-01 13:12:51 +00:00
import json
2024-03-05 12:43:56 +00:00
import re
2015-07-16 17:54:43 +00:00
2015-02-12 19:36:54 +00:00
from . common import InfoExtractor
2024-09-21 20:08:53 +00:00
from . . utils import ExtractorError , join_nonempty , traverse_obj
2014-06-26 18:30:44 +00:00
2024-03-01 12:24:48 +00:00
class NPOIE ( InfoExtractor ) :
2015-07-16 17:21:04 +00:00
IE_NAME = ' npo '
2024-03-01 12:24:48 +00:00
IE_DESC = ' npo.nl '
2024-03-14 12:37:41 +00:00
_VALID_URL = r ' https?://(?:www \ .)?npo \ .nl/start/serie/ '
2014-06-26 18:30:44 +00:00
2017-03-01 15:14:46 +00:00
_TESTS = [ {
2024-03-01 09:36:03 +00:00
' url ' : ' https://npo.nl/start/serie/zembla/seizoen-2015/wie-is-de-mol-2/ ' ,
2024-03-10 15:27:40 +00:00
' md5 ' : ' f9ce9c43cc8bc3b8138df1562b99c379 ' ,
' info_dict ' : {
' description ' : ' Wie is de mol? (2) ' ,
' duration ' : 2439 ,
2024-03-11 13:14:38 +00:00
' ext ' : ' m4v ' ,
2024-03-10 15:27:40 +00:00
' id ' : ' wie-is-de-mol-2 ' ,
' thumbnail ' : ' https://assets-start.npo.nl/resources/2023/07/01/e723c3cf-3e42-418a-9ba5-f6dbb64b516a.jpg ' ,
' title ' : ' Wie is de mol? (2) '
}
2017-03-01 15:14:46 +00:00
} , {
2024-03-01 09:36:03 +00:00
' url ' : ' https://npo.nl/start/serie/vpro-tegenlicht/seizoen-11/zwart-geld-de-toekomst-komt-uit-afrika ' ,
2024-03-10 15:27:40 +00:00
' md5 ' : ' c84d054219c4888ed53b4ee3d01b2d93 ' ,
2017-03-01 15:14:46 +00:00
' info_dict ' : {
2024-03-10 15:27:40 +00:00
' id ' : ' zwart-geld-de-toekomst-komt-uit-afrika ' ,
' title ' : ' Zwart geld: de toekomst komt uit Afrika ' ,
2024-03-11 13:14:38 +00:00
' ext ' : ' mp4 ' ,
2024-03-10 15:27:40 +00:00
' description ' : ' Zwart geld: de toekomst komt uit Afrika ' ,
' thumbnail ' : ' https://assets-start.npo.nl/resources/2023/06/30/d9879593-1944-4249-990c-1561dac14d8e.jpg ' ,
' duration ' : 3000
2014-12-20 12:30:56 +00:00
} ,
2017-03-01 15:14:46 +00:00
} ]
2014-06-26 18:30:44 +00:00
2024-03-01 12:24:48 +00:00
def _get_token ( self , video_id ) :
return self . _download_json (
' https://npo.nl/start/api/domain/player-token?productId= %s ' % video_id ,
video_id ,
note = ' Downloading token ' ) [ ' token ' ]
2018-06-09 17:26:16 +00:00
2014-06-26 18:30:44 +00:00
def _real_extract ( self , url ) :
2024-03-15 12:02:56 +00:00
# Remove /afspelen and/or any trailing `/`s
url = re . sub ( r ' /(?:afspelen)?/*$ ' , ' ' , url )
2024-03-01 13:12:51 +00:00
slug = url . split ( ' / ' ) [ - 1 ]
2024-03-06 10:52:08 +00:00
program_metadata = self . _download_json ( ' https://npo.nl/start/api/domain/program-detail ' ,
2024-03-14 12:39:59 +00:00
slug , query = { ' slug ' : slug } )
2024-10-20 10:28:10 +00:00
product_id = traverse_obj ( program_metadata , ' productId ' )
2024-03-01 13:12:51 +00:00
if not product_id :
2024-10-20 10:28:10 +00:00
raise ExtractorError ( ' No productId found for slug: %s ' % ( slug , ) )
2024-03-11 13:14:38 +00:00
formats = self . _extract_formats_by_product_id ( product_id , slug , url )
2024-10-20 10:28:10 +00:00
self . _sort_formats ( formats )
return merge_dicts ( traverse_obj ( program_metadata , {
' title ' : ' title ' ,
' description ' : ( ( ' description ' , ( ' long ' , ' short ' , ' brief ' ) ) , ' title ' ) ,
' thumbnail ' : ( ' images ' , Ellipsis , ' url ' , T ( url_or_none ) ) ,
' duration ' : ( ' durationInSeconds ' , T ( int_or_none ) ) ,
} , get_all = False ) , {
2024-03-03 16:47:15 +00:00
' id ' : slug ,
' formats ' : formats ,
2024-10-20 10:28:10 +00:00
' title ' : slug ,
' description ' : slug ,
} )
2024-03-01 13:12:51 +00:00
2024-03-11 13:14:38 +00:00
def _extract_formats_by_product_id ( self , product_id , slug , url = None ) :
2024-03-03 16:47:15 +00:00
token = self . _get_token ( product_id )
2024-03-01 14:28:14 +00:00
formats = [ ]
for profile in (
2024-03-06 11:22:27 +00:00
' dash ' ,
# 'hls' is available too, but implementing it doesn't add much
# As far as I know 'dash' is always available
2024-03-01 14:28:14 +00:00
) :
stream_link = self . _download_json (
' https://prod.npoplayer.nl/stream-link ' , video_id = slug ,
data = json . dumps ( {
' profileName ' : profile ,
2024-03-03 16:47:15 +00:00
' referrerUrl ' : url or ' ' ,
2024-03-01 14:28:14 +00:00
} ) . encode ( ' utf8 ' ) ,
headers = {
' Authorization ' : token ,
' Content-Type ' : ' application/json ' ,
2024-03-11 12:40:23 +00:00
} ,
fatal = False ,
2024-03-01 14:28:14 +00:00
)
2024-09-21 19:58:53 +00:00
stream_url = traverse_obj ( stream_link , ( ' stream ' , ' streamURL ' ) )
2024-03-01 14:28:14 +00:00
formats . extend ( self . _extract_mpd_formats ( stream_url , slug , mpd_id = ' dash ' , fatal = False ) )
2024-03-03 16:47:15 +00:00
return formats
class BNNVaraIE ( NPOIE ) :
IE_NAME = ' bnnvara '
IE_DESC = ' bnnvara.nl '
_VALID_URL = r ' https?://(?:www \ .)?bnnvara \ .nl/videos/[0-9]* '
2024-03-05 12:43:56 +00:00
_TESTS = [ {
' url ' : ' https://www.bnnvara.nl/videos/27455 ' ,
2024-03-10 15:27:40 +00:00
' md5 ' : ' 392dd367877739e49b9e0a9a550b178a ' ,
' info_dict ' : {
' id ' : ' VARA_101369808 ' ,
' thumbnail ' : ' https://media.vara.nl/files/thumbnails/321291_custom_zembla__wie_is_de_mol_680x383.jpg ' ,
2024-03-11 13:14:38 +00:00
' title ' : ' Zembla - Wie is de mol? ' ,
' ext ' : ' mp4 ' ,
2024-03-10 15:27:40 +00:00
}
2024-03-05 12:43:56 +00:00
} ]
2024-03-03 16:47:15 +00:00
def _real_extract ( self , url ) :
url = url . rstrip ( ' / ' )
video_id = url . split ( ' / ' ) [ - 1 ]
2024-10-20 10:00:44 +00:00
graphql_query = """ query getMedia($id: ID!, $mediaUrl: String, $hasAdConsent: Boolean!, $atInternetId: Int) {
player (
id : $ id
mediaUrl : $ mediaUrl
hasAdConsent : $ hasAdConsent
atInternetId : $ atInternetId
) {
. . . on PlayerSucces {
brand {
name
slug
broadcastsEnabled
__typename
}
title
programTitle
pomsProductId
broadcasters {
name
__typename
}
duration
classifications {
title
imageUrl
type
__typename
}
image {
title
url
__typename
}
cta {
title
url
__typename
}
genres {
name
__typename
}
subtitles {
url
language
__typename
}
sources {
name
url
ratio
__typename
}
type
token
__typename
}
. . . on PlayerError {
error
__typename
}
__typename
}
} """
2024-03-03 16:47:15 +00:00
media = self . _download_json ( ' https://api.bnnvara.nl/bff/graphql ' ,
video_id ,
data = json . dumps (
{
' operationName ' : ' getMedia ' ,
' variables ' : {
' id ' : video_id ,
' hasAdConsent ' : False ,
' atInternetId ' : 70
} ,
2024-10-20 10:00:44 +00:00
' query ' : graphql_query
2024-03-03 16:47:15 +00:00
} ) . encode ( ' utf8 ' ) ,
headers = {
' Content-Type ' : ' application/json ' ,
} )
2024-10-20 10:11:30 +00:00
2024-10-20 10:08:50 +00:00
product_id = traverse_obj ( media , ( ' data ' , ' player ' , ' pomsProductId ' ) )
formats = self . _download_by_product_id ( product_id , video_id ) if product_id else [ ]
self . _sort_formats ( formats )
2014-06-26 18:30:44 +00:00
return {
2024-03-03 16:47:15 +00:00
' id ' : product_id ,
2024-09-21 20:08:53 +00:00
' title ' : traverse_obj ( media , ( ' data ' , ' player ' , ' title ' ) ) ,
2024-03-01 14:28:14 +00:00
' formats ' : formats ,
2024-09-21 20:08:53 +00:00
' thumbnail ' : traverse_obj ( media , ( ' data ' , ' player ' , ' image ' , ' url ' ) ) ,
2014-06-26 18:30:44 +00:00
}
2024-03-05 12:43:56 +00:00
class ONIE ( NPOIE ) :
IE_NAME = ' on '
IE_DESC = ' ongehoordnederland.tv '
_VALID_URL = r ' https?://(?:www \ .)?ongehoordnederland.tv/.* '
_TESTS = [ {
' url ' : ' https://ongehoordnederland.tv/2024/03/01/korte-clips/heeft-preppen-zin-betwijfel-dat-je-daar-echt-iets-aan-zult-hebben-bij-oorlog-lydia-daniel/ ' ,
2024-03-10 16:04:00 +00:00
' md5 ' : ' a85ebd50fa86fe5cbce654655f7dbb12 ' ,
' info_dict ' : {
}
2024-03-05 12:43:56 +00:00
} ]
def _real_extract ( self , url ) :
video_id = url . rstrip ( ' / ' ) . split ( ' / ' ) [ - 1 ]
2024-09-21 19:58:53 +00:00
page = self . _download_webpage ( url , video_id )
2024-03-05 12:43:56 +00:00
results = re . findall ( " page: ' (.+) ' " , page )
formats = [ ]
for result in results :
2024-03-11 13:14:38 +00:00
formats . extend ( self . _extract_formats_by_product_id ( result , video_id ) )
2024-03-05 12:43:56 +00:00
2024-10-20 10:11:30 +00:00
self . _sort_formats ( formats )
2024-03-05 12:43:56 +00:00
return {
' id ' : video_id ,
' title ' : video_id ,
' formats ' : formats ,
}
2024-03-05 12:55:59 +00:00
2024-03-05 13:04:03 +00:00
class ZAPPIE ( NPOIE ) :
IE_NAME = ' zapp '
IE_DESC = ' zapp.nl '
_VALID_URL = r ' https?://(?:www \ .)?zapp.nl/.* '
_TESTS = [ {
2024-03-11 12:40:23 +00:00
' url ' : ' https://www.zapp.nl/programmas/zappsport/gemist/POMS_AT_811523 ' ,
' md5 ' : ' 9eb2d8b6f88b72b6b986ea2c26a81588 ' ,
' info_dict ' : {
' id ' : ' POMS_AT_811523 ' ,
' title ' : ' POMS_AT_811523 ' ,
} ,
2024-03-05 13:04:03 +00:00
} ]
def _real_extract ( self , url ) :
video_id = url . rstrip ( ' / ' ) . split ( ' / ' ) [ - 1 ]
2024-03-11 13:14:38 +00:00
formats = self . _extract_formats_by_product_id ( video_id , video_id , url = url )
2024-03-05 13:04:03 +00:00
return {
' id ' : video_id ,
' title ' : video_id ,
' formats ' : formats ,
}
2024-03-06 11:22:27 +00:00
class SchoolTVIE ( NPOIE ) :
IE_NAME = ' schooltv '
IE_DESC = ' schooltv.nl '
_VALID_URL = r ' https?://(?:www \ .)?schooltv.nl/item/.* '
_TESTS = [ {
' url ' : ' https://schooltv.nl/item/zapp-music-challenge-2015-zapp-music-challenge-2015 ' ,
2024-03-10 15:57:31 +00:00
' md5 ' : ' e9ef151c4886994e2bea23593348cb14 ' ,
' info_dict ' : {
' id ' : ' zapp-music-challenge-2015-zapp-music-challenge-2015 ' ,
' title ' : ' Zapp Music Challenge 2015 - Alain Clark & Yaell ' ,
' description ' : " Een nummer schrijven met de super bekende soulzanger en producer Alain Clark? Dat is de uitdaging voor de dertienjarige Yaell uit Delft. En als het dan echt goed is, mag hij het ook nog eens live gaan spelen op de speelplaats bij Giel Beelen! Muziek is heel erg belangrijk in het leven van Yaell. ' Als er geen muziek zou zijn, dan zou ik heel veel niet kunnen. ' Hij is dan ook altijd aan het schrijven, vaak over zijn eigen leven. Maar soms is het best lastig om die teksten te verzinnen. Vindt hij de inspiratie om een hit te maken met Alain? "
} ,
2024-03-06 11:22:27 +00:00
} ]
def _real_extract ( self , url ) :
video_id = url . rstrip ( ' / ' ) . split ( ' / ' ) [ - 1 ]
2024-10-20 09:49:09 +00:00
build_id = self . _search_nextjs_data (
self . _download_webpage ( url , video_id ) ,
video_id ,
) [ ' buildId ' ]
2024-03-06 11:22:27 +00:00
metadata_url = ' https://schooltv.nl/_next/data/ ' \
+ build_id \
2024-10-20 09:49:09 +00:00
+ ' /video-item/ ' \
2024-03-06 11:22:27 +00:00
+ video_id + ' .json '
metadata = self . _download_json ( metadata_url ,
video_id ) . get ( ' pageProps ' , { } ) . get ( ' data ' , { } )
2024-03-11 13:14:38 +00:00
formats = self . _extract_formats_by_product_id ( metadata . get ( ' poms_mid ' ) , video_id )
2024-03-06 11:22:27 +00:00
2024-10-20 10:28:10 +00:00
self . _sort_formats ( formats )
2024-03-06 11:22:27 +00:00
return {
' id ' : video_id ,
2024-09-21 20:04:50 +00:00
' title ' : join_nonempty ( ' title ' , ' subtitle ' , from_dict = metadata ) ,
2024-03-06 11:22:27 +00:00
' description ' : metadata . get ( ' description ' ) or metadata . get ( ' short_description ' ) ,
' formats ' : formats ,
}
2024-03-06 11:53:37 +00:00
2024-03-07 15:23:09 +00:00
class NTRSubsiteIE ( NPOIE ) :
2024-03-06 11:53:37 +00:00
def _real_extract ( self , url ) :
2024-03-07 15:23:09 +00:00
video_id = url . rstrip ( ' / ' ) . split ( ' / ' ) [ - 1 ]
2024-03-10 15:57:31 +00:00
page , _ = self . _download_webpage_handle ( url , video_id )
2024-03-07 15:23:09 +00:00
results = re . findall ( r ' data-mid= " (.+_.+) " ' , page )
formats = [ ]
for result in results :
2024-03-11 13:14:38 +00:00
formats . extend ( self . _extract_formats_by_product_id ( result , video_id ) )
2024-03-07 15:23:09 +00:00
break
2024-10-20 10:28:10 +00:00
self . _sort_formats ( formats )
2024-03-07 15:23:09 +00:00
return {
' id ' : video_id ,
' title ' : video_id ,
' formats ' : formats ,
}
class HetKlokhuisIE ( NTRSubsiteIE ) :
2024-03-10 15:57:31 +00:00
IE_NAME = ' hetklokhuis '
2024-03-07 15:23:09 +00:00
IE_DESC = ' hetklokhuis.nl '
2024-03-10 15:57:31 +00:00
_VALID_URL = r ' https?://(?:www \ .)?hetklokhuis \ .nl/.* '
2024-03-07 15:23:09 +00:00
_TESTS = [ {
2024-03-10 15:57:31 +00:00
' url ' : ' https://hetklokhuis.nl/dossier/142/zoek-het-uit/tv-uitzending/2987/aliens ' ,
' md5 ' : ' 4664b54ed4e05183b1e4f2f4290d551e ' ,
' info_dict ' : {
' id ' : ' aliens ' ,
2024-03-11 12:40:23 +00:00
' title ' : ' aliens ' ,
} ,
2024-03-07 15:23:09 +00:00
} ]
2024-03-06 11:53:37 +00:00
class VPROIE ( NPOIE ) :
IE_NAME = ' vpro '
IE_DESC = ' vpro.nl '
_VALID_URL = r ' https?://(?:www \ .)?vpro.nl/.* '
_TESTS = [ {
' url ' : ' https://www.vpro.nl/programmas/tegenlicht/kijk/afleveringen/2015-2016/offline-als-luxe.html ' ,
2024-03-10 15:27:40 +00:00
' md5 ' : ' cf302e066b5313cfaf8d5adf50d64f13 ' ,
' info_dict ' : {
' id ' : ' offline-als-luxe.html ' ,
' title ' : ' offline-als-luxe.html ' ,
' ext ' : ' m4v ' ,
2024-03-11 12:40:23 +00:00
} ,
2024-03-06 11:53:37 +00:00
} ]
def _real_extract ( self , url ) :
video_id = url . rstrip ( ' / ' ) . split ( ' / ' ) [ - 1 ]
page , _ = self . _download_webpage_handle ( url , video_id )
2024-03-14 12:34:33 +00:00
results = re . findall ( r ' data-media-id= " ([a-zA-Z0-9_]+) " \ s ' , page )
2024-03-06 11:53:37 +00:00
formats = [ ]
for result in results :
2024-03-11 13:14:38 +00:00
formats . extend ( self . _extract_formats_by_product_id ( result , video_id ) )
2024-10-20 09:49:09 +00:00
break
2024-03-06 11:53:37 +00:00
2024-10-20 10:28:10 +00:00
self . _sort_formats ( formats )
2024-03-06 11:53:37 +00:00
return {
' id ' : video_id ,
' title ' : video_id ,
' formats ' : formats ,
}
2024-03-07 15:23:09 +00:00
class AndereTijdenIE ( NTRSubsiteIE ) :
IE_NAME = ' anderetijden '
IE_DESC = ' anderetijden.nl '
_VALID_URL = r ' https?://(?:www \ .)?anderetijden \ .nl/.* '
_TESTS = [ {
2024-03-10 15:57:31 +00:00
' url ' : ' https://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem ' ,
' md5 ' : ' 3d607b16e00b459156b4ab6e163dccd7 ' ,
' info_dict ' : {
' id ' : ' Duitse-soldaten-over-de-Slag-bij-Arnhem ' ,
2024-03-11 12:40:23 +00:00
' title ' : ' Duitse-soldaten-over-de-Slag-bij-Arnhem ' ,
} ,
2024-03-07 15:23:09 +00:00
} ]