1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-12-23 16:36:48 +00:00

[vrak] Add extractor

This commit is contained in:
Olivier Bilodeau 2016-12-15 20:14:04 -05:00 committed by Sergey M․
parent d02d4fa0a9
commit cbb127568a
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 69 additions and 0 deletions

View File

@ -1165,6 +1165,7 @@ from .voicerepublic import VoiceRepublicIE
from .voxmedia import VoxMediaIE from .voxmedia import VoxMediaIE
from .vporn import VpornIE from .vporn import VpornIE
from .vrt import VRTIE from .vrt import VRTIE
from .vrak import VrakIE
from .vube import VubeIE from .vube import VubeIE
from .vuclip import VuClipIE from .vuclip import VuClipIE
from .vvvvid import VVVVIDIE from .vvvvid import VVVVIDIE

View File

@ -0,0 +1,68 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .brightcove import BrightcoveNewIE
class VrakIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?target=(?P<id>[0-9\.]+).*'
_TEST = {
'url': 'http://www.vrak.tv/videos?target=1.2240923&filtre=emission&id=1.1806721',
'md5': 'c5d5ce237bca3b1e990ce1b48d1f0948',
'info_dict': {
'id': '5231040869001',
'ext': 'mp4',
'title': 'Référendums américains, animés japonais et hooligans russes',
'upload_date': '20161201',
'description': 'This video file has been uploaded automatically using Oprah. It should be updated with real description soon.',
'timestamp': 1480628425,
'uploader_id': '2890187628001',
}
}
def _real_extract(self, url):
url_id = self._match_id(url)
webpage = self._download_webpage(url, url_id)
result = {}
result['title'] = self._html_search_regex(
r'<h3 class="videoTitle">(.+?)</h3>', webpage, 'title')
# Inspired from BrightcoveNewIE._extract_url()
entries = []
for account_id, player_id, _, video_id in re.findall(
# account_id, player_id and embed from:
# <div class="video-player [...]
# data-publisher-id="2890187628001"
# data-player-id="VkSnGw3cx"
# video id is extracted from weird CMS Java/Javascript notation:
# RW java.lang.String value = '5231040869001';
# Need to use backtrack to pin to a ref since video is in grid
# layout with others
r'''(?sx)
<div[^>]+
data-publisher-id=["\'](\d+)["\']
[^>]*
data-player-id=["\']([^"\']+)["\']
[^>]*
refId&quot;:&quot;([^&]+)&quot;
[^>]*
>.*?
</div>.*?
RW\ java\.lang\.String\ value\ =\ \'brightcove\.article\.\d+\.\3\'
[^>]*
RW\ java\.lang\.String\ value\ =\ \'(\d+)\'
''', webpage):
entries.append(
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
% (account_id, player_id, 'default', video_id))
if entries:
result = self.url_result(entries[0], BrightcoveNewIE.ie_key())
return result