1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2025-02-07 02:29:48 +00:00

[BrainPOP] Add new extractor

This commit is contained in:
Nehal Patel 2016-07-06 23:36:29 -05:00
parent 7cfc1e2a10
commit 45abe2051d
2 changed files with 48 additions and 0 deletions

View File

@ -0,0 +1,47 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class BrainPOPIE(InfoExtractor):
_VALID_URL = r'https?:\/\/(?:(.+)\.)?brainpop\.com\/(?P<id>[^\r\n]+)'
_TEST = {
'url': 'https://www.brainpop.com/english/freemovies/williamshakespeare/',
'md5': '676d936271b628dc05e4cec377751919',
'info_dict': {
'id': 'english/freemovies/williamshakespeare/',
'ext': 'mp4',
'title': 'William Shakespeare - BrainPOP',
'thumbnail': 're:^https?://.*\.png$',
'description': 'He could do comedies, tragedies, histories and poetry. Learn about the greatest playwright in the history of the English language!',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id)
ec_token = self._html_search_regex(r'ec_token : \'(.+)\'', webpage, "token")
movie_cdn_path = self._html_search_regex(r'movie_cdn_path : \'(.+)\'', webpage, "cdn path")
mp4 = self._html_search_regex(r'mp4":"([^"]*)', webpage, "mp4")
url = movie_cdn_path + mp4.replace("\\", "") + "?" + ec_token
title = self._html_search_regex(r'type":"Movie","name":"([^"]*)"', webpage, "title")
thumbnail_cdn = self._html_search_regex(r"'cdn_path' : '([^']*)'", webpage, "thumbnail cdn")
thumbnail_image = self._html_search_regex(r'type":"Movie","name":"[^"]*","image":"([^"]*)"', webpage, "thumbnail")
thumbnail = thumbnail_cdn + thumbnail_image.replace("\\", "")
description = self._html_search_regex(r'type":"Movie","name":"[^"]*","image":"[^"]*","description":"([^"]*)"', webpage, "description")
return {
'id': video_id,
'url': url,
'title': title,
'thumbnail': thumbnail,
'description': description,
}

View File

@ -93,6 +93,7 @@ from .bokecc import BokeCCIE
from .bpb import BpbIE from .bpb import BpbIE
from .br import BRIE from .br import BRIE
from .bravotv import BravoTVIE from .bravotv import BravoTVIE
from .brainpop import BrainPOPIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .brightcove import ( from .brightcove import (
BrightcoveLegacyIE, BrightcoveLegacyIE,