1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2025-07-23 10:38:17 +00:00

Mostly ready for contribution

This commit is contained in:
Olivier Berger 2020-05-01 22:07:23 +02:00
parent 3e70b4f036
commit a7756ff597
2 changed files with 45 additions and 12 deletions

View File

@ -1,5 +1,7 @@
# coding: utf-8 # coding: utf-8
# Contributed by Olivier Berger <olivier.berger@telecom-sudparis.eu>
# Extract material from recordings made inside BigBlueButton # Extract material from recordings made inside BigBlueButton
# BigBlueButton records multiple videos : # BigBlueButton records multiple videos :
@ -8,14 +10,21 @@
# for slides, annotations, etc. the playback app typically renders them on the fly upon playback # for slides, annotations, etc. the playback app typically renders them on the fly upon playback
# so it may not be easy to capture that with youtube-dl # so it may not be easy to capture that with youtube-dl
# Extract a merged video, without the slides with
# youtube-dl --merge-output-format mkv -f slides+speaker "https://mybbb.example.com/playback/presentation/2.0/playback.html?meetingId=12345679a50a715e8d6dc692df996dceb8d788f8-1234566973639"
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from .openload import PhantomJSwrapper from ..utils import (
unified_timestamp,
xpath_text,
xpath_with_ns,
)
# TODO : thumbnails _s = lambda p: xpath_with_ns(p, {'svg': 'http://www.w3.org/2000/svg'})
_x = lambda p: xpath_with_ns(p, {'xlink': 'http://www.w3.org/1999/xlink'})
class BigBlueButtonIE(InfoExtractor): class BigBlueButtonIE(InfoExtractor):
_VALID_URL = r'(?P<website>https?://[^/]+)/playback/presentation/2.0/playback.html\?meetingId=(?P<id>[0-9a-f\-]+)' _VALID_URL = r'(?P<website>https?://[^/]+)/playback/presentation/2.0/playback.html\?meetingId=(?P<id>[0-9a-f\-]+)'
@ -39,17 +48,42 @@ class BigBlueButtonIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
m = self._VALID_URL_RE.match(url) m = self._VALID_URL_RE.match(url)
website = m.group('website') website = m.group('website')
#print(video_id)
print(website)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
# print(webpagejs) # Extract basic metadata (more available in metadata.xml)
metadata_url = website + '/presentation/' + video_id + '/metadata.xml'
metadata = self._download_xml(metadata_url, video_id)
# TODO more code goes here, for example ... id = xpath_text(metadata, 'id')
#title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title') meta = metadata.find('./meta')
title = video_id meeting_name = xpath_text(meta, 'meetingName')
start_time = xpath_text(metadata, 'start_time')
title = meeting_name
# This code unused : have to grasp what to do with thumbnails
thumbnails = []
images = metadata.find('./playback/extensions/preview/images')
for image in images:
thumbnails += {
'url': image.text.strip(),
'width': image.get('width'),
'height': image.get('height')
}
# This code mostly useless unless one know how to process slides
shapes_url = website + '/presentation/' + video_id + '/shapes.svg'
shapes = self._download_xml(shapes_url, video_id)
images = shapes.findall(_s("./svg:image[@class='slide']"))
slides = []
for image in images:
slides.append(image.get(_x('xlink:href')))
# We produce 2 formats :
# - the 'webcams.webm' one, for speaker (can be used for merging its audio)
# - the 'deskshare.webm' one, for screen sharing (can be used
# for merging its video) - it lacks the slides unfortunately
formats = [] formats = []
sources = { 'speaker': '/video/webcams.webm', 'slides': '/deskshare/deskshare.webm' } sources = { 'speaker': '/video/webcams.webm', 'slides': '/deskshare/deskshare.webm' }
@ -65,8 +99,6 @@ class BigBlueButtonIE(InfoExtractor):
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
# 'description': self._og_search_description(webpage), 'timestamp': int(start_time),
# 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), # 'thumbnails': thumbnails
# TODO more properties (see youtube_dl/extractor/common.py)
} }

View File

@ -81,6 +81,7 @@ from .awaan import (
from .azmedien import AZMedienIE from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE from .baidu import BaiduVideoIE
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
from .bbb import BigBlueButtonIE
from .bbc import ( from .bbc import (
BBCCoUkIE, BBCCoUkIE,
BBCCoUkArticleIE, BBCCoUkArticleIE,