1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2025-02-06 18:20:02 +00:00

[IQM2] Add new extractor

first cut
This commit is contained in:
John Hawkinson 2016-10-06 01:09:53 -04:00
parent 017eb82934
commit 70d117053d
2 changed files with 82 additions and 0 deletions

View File

@ -392,6 +392,7 @@ from .instagram import InstagramIE, InstagramUserIE
from .internetvideoarchive import InternetVideoArchiveIE from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE from .iprima import IPrimaIE
from .iqiyi import IqiyiIE from .iqiyi import IqiyiIE
from .iqm2 import IQM2IE
from .ir90tv import Ir90TvIE from .ir90tv import Ir90TvIE
from .ivi import ( from .ivi import (
IviIE, IviIE,

View File

@ -0,0 +1,81 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from .generic import GenericIE
# IQM2 aka Accela is a municipal meeting management platform that
# (among other things) stores livestreamed video from municipal
# meetings. After a hefty (several-hour) processing time, that video
# is avaialble in easily downloadable form from their web portal, but
# prior to that, the video can only be watched in realtime through
# JWPlayer. This extractor is designed to download the realtime video
# prior to download links being available. See:
# http://www.iqm2.com/About/Accela.aspx
# http://www.accela.com/
# This makes it challenging to produce a test case for, because the
# extractor will want to follow the processed and easily downloadble
# version. So there may be interesting bugs during the race condition
# time before the processed video is available (which is really the
# only time this extractor is especially important).
# This is also a relatively braindead extractor. It parses a given page like
# http://cambridgema.iqm2.com/Citizens/SplitView.aspx?Mode=Video&MeetingID=1679#
# to determine the location of an inner div defined by a URL of the form
# http://cambridgema.iqm2.com/Citizens/VideoScreen.aspx?MediaID=1563&Frame=SplitView
# and then simply hands that URL to the generic extractor, which
# matches it under the "Broaden the findall a little bit: JWPlayer JS
# loader" (line 2372 as of 6 Oct 2016).
# This also appears to be the only example to date of an extractor
# that calls-out to the GenericIE generic extractor, so it may be
# useful as an example. Or perhaps it means that there's a better way
# to do this and it should be rewritten differently, esp. to not
# leverage the generic.
# Contributed by John Hawkinson <jhawk@mit.edu>, 6 Oct 2016.
# https://CambridgeMA.IQM2.com/Citizens/VideoMain.aspx?MeetingID=1679
class IQM2IE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?iqm2\.com/Citizens/SplitView.aspx\?Mode=Video&MeetingID=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://cambridgema.iqm2.com/Citizens/SplitView.aspx?Mode=Video&MeetingID=1679#',
'md5': '478ea30eee1966f7be0d8dd623122148',
'info_dict': {
'id': '1563_720',
'ext': 'mp4',
'title': 'Cambridge, MA (2)',
'uploader': 'cambridgema.iqm2.com',
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
# title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
title = 'Video title goes here'
purl = compat_urllib_parse_urlparse(url)
hostname = purl.hostname
print "URL is", url, "at", hostname
nurl = self._html_search_regex(r'<div id="VideoPanelInner".*src="([^"]+)"',
webpage, 'url');
print "URL is", nurl
nnurl = purl.scheme+'://'+purl.netloc+nurl
print "URL is", nnurl
return GenericIE(self._downloader)._real_extract(nnurl)