diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index feee06004..6be43a613 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -392,6 +392,7 @@ from .instagram import InstagramIE, InstagramUserIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import IPrimaIE from .iqiyi import IqiyiIE +from .iqm2 import IQM2IE from .ir90tv import Ir90TvIE from .ivi import ( IviIE, diff --git a/youtube_dl/extractor/iqm2.py b/youtube_dl/extractor/iqm2.py new file mode 100644 index 000000000..06df0df0c --- /dev/null +++ b/youtube_dl/extractor/iqm2.py @@ -0,0 +1,81 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse +from .generic import GenericIE + +# IQM2 aka Accela is a municipal meeting management platform that +# (among other things) stores livestreamed video from municipal +# meetings. After a hefty (several-hour) processing time, that video +# is avaialble in easily downloadable form from their web portal, but +# prior to that, the video can only be watched in realtime through +# JWPlayer. This extractor is designed to download the realtime video +# prior to download links being available. See: +# http://www.iqm2.com/About/Accela.aspx +# http://www.accela.com/ + +# This makes it challenging to produce a test case for, because the +# extractor will want to follow the processed and easily downloadble +# version. So there may be interesting bugs during the race condition +# time before the processed video is available (which is really the +# only time this extractor is especially important). + +# This is also a relatively braindead extractor. It parses a given page like +# http://cambridgema.iqm2.com/Citizens/SplitView.aspx?Mode=Video&MeetingID=1679# +# to determine the location of an inner div defined by a URL of the form +# http://cambridgema.iqm2.com/Citizens/VideoScreen.aspx?MediaID=1563&Frame=SplitView + +# and then simply hands that URL to the generic extractor, which +# matches it under the "Broaden the findall a little bit: JWPlayer JS +# loader" (line 2372 as of 6 Oct 2016). + +# This also appears to be the only example to date of an extractor +# that calls-out to the GenericIE generic extractor, so it may be +# useful as an example. Or perhaps it means that there's a better way +# to do this and it should be rewritten differently, esp. to not +# leverage the generic. + +# Contributed by John Hawkinson , 6 Oct 2016. + + +# https://CambridgeMA.IQM2.com/Citizens/VideoMain.aspx?MeetingID=1679 + +class IQM2IE(InfoExtractor): + _VALID_URL = r'https?://(?:\w+\.)?iqm2\.com/Citizens/SplitView.aspx\?Mode=Video&MeetingID=(?P[0-9]+)' + _TEST = { + 'url': 'http://cambridgema.iqm2.com/Citizens/SplitView.aspx?Mode=Video&MeetingID=1679#', + 'md5': '478ea30eee1966f7be0d8dd623122148', + 'info_dict': { + 'id': '1563_720', + 'ext': 'mp4', + 'title': 'Cambridge, MA (2)', + 'uploader': 'cambridgema.iqm2.com', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + # title = self._html_search_regex(r'

(.*?)

', webpage, 'title') + title = 'Video title goes here' + + purl = compat_urllib_parse_urlparse(url) + hostname = purl.hostname + print "URL is", url, "at", hostname + nurl = self._html_search_regex(r'