mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-02-07 02:29:48 +00:00
![John Hawkinson](/assets/img/avatar_default.png)
Video IDs should be based on the unique ID of the video, not the meeting ID of the parent page that links to the media page. Unfortunately we don't learn the media ID until after downloading the first page.
97 lines
3.8 KiB
Python
97 lines
3.8 KiB
Python
# coding: utf-8
|
|
from __future__ import unicode_literals
|
|
from ..utils import smuggle_url
|
|
|
|
import re
|
|
|
|
from .common import InfoExtractor
|
|
from ..compat import compat_urlparse
|
|
from .jwplatform import JWPlatformBaseIE
|
|
|
|
# IQM2 aka Accela is a municipal meeting management platform that
|
|
# (among other things) stores livestreamed video from municipal
|
|
# meetings. In some cases (e.g. cambridgema.iqm2.com), after a hefty
|
|
# (several-hour) processing time, that video is available in easily
|
|
# downloadable form from their web portal, but prior to that, the
|
|
# video can only be watched in realtime through JWPlayer. Other
|
|
# (somervillecityma.iqm2.com) instances don't seem to ever offer a
|
|
# downloadable form. This extractor is designed to download the
|
|
# realtime video without the download links being available. For more
|
|
# info on Accela, see:
|
|
# http://www.iqm2.com/About/Accela.aspx
|
|
# http://www.accela.com/
|
|
# https://github.com/Accela-Inc/leg-man-api-docs
|
|
|
|
# This processing makes hard to test since there's only a narrow
|
|
# window when it matters. After that the extractor finds links to
|
|
# the processed video intead.
|
|
|
|
# No metadata is retrieved, as that would require finding a metadata
|
|
# URL and retreiving a 3rd HTTP resource.
|
|
|
|
# Contributed by John Hawkinson <jhawk@mit.edu>, 6 Oct 2016.
|
|
|
|
class IQM2IE(JWPlatformBaseIE):
|
|
|
|
# We commonly see both iqm2.com and IQM2.com.
|
|
_VALID_URL = r'(?i)https?://(?:\w+\.)?iqm2\.com/Citizens/\w+.aspx\?.*MeetingID=(?P<id>[0-9]+)'
|
|
_TESTS = [
|
|
{ # This is a "realtime" case
|
|
'url': 'http://somervillecityma.iqm2.com/Citizens/SplitView.aspx?Mode=Video&MeetingID=2308',
|
|
'md5': '9ef458ff6c93f8b9323cf79db4ede9cf',
|
|
'info_dict': {
|
|
'id': '70472',
|
|
'ext': 'mp4',
|
|
'title': 'City of Somerville, Massachusetts',
|
|
}},
|
|
{
|
|
# This is a "postprocessed" case
|
|
'url': 'http://cambridgema.iqm2.com/Citizens/SplitView.aspx?Mode=Video&MeetingID=1679#',
|
|
'md5': '478ea30eee1966f7be0d8dd623122148',
|
|
'info_dict': {
|
|
'id': '1563',
|
|
'ext': 'mp4',
|
|
'title': 'Cambridge, MA',
|
|
}},
|
|
{
|
|
'url': 'https://CambridgeMA.IQM2.com/Citizens/VideoMain.aspx?MeetingID=1679',
|
|
'only_matching': True,
|
|
}]
|
|
|
|
def _find_jwplayer_data(self, webpage):
|
|
mobj = re.search(r'SetupJWPlayer\(eval\(\'(?P<options>[^)]+)\'\)', webpage)
|
|
if mobj:
|
|
return mobj.group('options')
|
|
|
|
def _real_extract(self, url):
|
|
parent_id = self._match_id(url)
|
|
webpage = self._download_webpage(url, parent_id)
|
|
|
|
# Take, e.g.
|
|
# http://cambridgema.iqm2.com/Citizens/SplitView.aspx?Mode=Video&MeetingID=1679
|
|
# and look for
|
|
# <div id="VideoPanel" class="LeftTopContent">
|
|
# <div id="VideoPanelInner" ... src="/Citizens/VideoScreen.aspx?MediaID=1563&Frame=SplitView">
|
|
# and then parse the canonicalized src element
|
|
inner_url_rel = self._html_search_regex(
|
|
r'<div id="VideoPanelInner".*src="([^"]+)"',
|
|
webpage, 'url');
|
|
|
|
inner_url = compat_urlparse.urljoin(url, inner_url_rel)
|
|
mobj = re.match(
|
|
r'(?i)https?://(?:\w+\.)?iqm2\.com/Citizens/\w+.aspx\?.*MediaID=(?P<id>[0-9]+)',
|
|
inner_url)
|
|
video_id = mobj.group('id')
|
|
webpage = self._download_webpage(inner_url, video_id)
|
|
|
|
info_dict = self._extract_jwplayer_data(
|
|
webpage, video_id, require_title=False)
|
|
|
|
video_title = self._og_search_title(
|
|
webpage, default=None) or self._html_search_regex(
|
|
r'(?s)<title>(.*?)</title>', webpage, 'video title',
|
|
default='video')
|
|
info_dict['title'] = video_title
|
|
|
|
return info_dict
|