Leverage JWPlatformBase instead of Generic

Per @yan12125's suggestion: * Redefine _find_jwplayer_data() to use the SetupJWPlayer RE that's IQM2-specific * Retreive the 2ndary webpage on our own * Search for the title just like generic does
2025-07-01 16:06:23 +00:00 · 2016-10-10 02:12:44 -04:00 · 2016-10-10 02:12:44 -04:00 · 87af84de37
commit 87af84de37
parent 00253f8312
1 changed files with 25 additions and 15 deletions
--- a/youtube_dl/extractor/iqm2.py
+++ b/youtube_dl/extractor/iqm2.py
@ -6,7 +6,7 @@ import re

 from .common import InfoExtractor
 from ..compat import compat_urlparse
-from .generic import GenericIE
+from .jwplatform import JWPlatformBaseIE

 # IQM2 aka Accela is a municipal meeting management platform that
 # (among other things) stores livestreamed video from municipal
@ -31,52 +31,62 @@ from .generic import GenericIE

 # Contributed by John Hawkinson <jhawk@mit.edu>, 6 Oct 2016.

-class IQM2IE(InfoExtractor):
+class IQM2IE(JWPlatformBaseIE):

    # We commonly see both iqm2.com and IQM2.com.
    _VALID_URL = r'(?i)https?://(?:\w+\.)?iqm2\.com/Citizens/\w+.aspx\?.*MeetingID=(?P<id>[0-9]+)'
    _TESTS = [
-        {
+        {  # This is a "realtime" case
            'url': 'http://somervillecityma.iqm2.com/Citizens/SplitView.aspx?Mode=Video&MeetingID=2308',
            'md5': '9ef458ff6c93f8b9323cf79db4ede9cf',
            'info_dict': {
-                'id': '70472_480',
+                'id': '2308',
                'ext': 'mp4',
                'title': 'City of Somerville, Massachusetts',
-                'uploader': 'somervillecityma.iqm2.com',
            }},
        {
+            # This is a "postprocessed" case    
            'url': 'http://cambridgema.iqm2.com/Citizens/SplitView.aspx?Mode=Video&MeetingID=1679#',
            'md5': '478ea30eee1966f7be0d8dd623122148',
            'info_dict': {
-                'id': '1563_720',
+                'id': '1679',
                'ext': 'mp4',
-                'title': 'Cambridge, MA (2)',
-                'uploader': 'cambridgema.iqm2.com',
+                'title': 'Cambridge, MA',
            }},
        {
            'url': 'https://CambridgeMA.IQM2.com/Citizens/VideoMain.aspx?MeetingID=1679',
            'only_matching': True,
        }]

+    def _find_jwplayer_data(self, webpage):
+        mobj = re.search(r'SetupJWPlayer\(eval\(\'(?P<options>[^)]+)\'\)', webpage)
+        if mobj:
+            return mobj.group('options')
+        
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

-        # Simple extractor: take, e.g.
+        # Take, e.g.
        #   http://cambridgema.iqm2.com/Citizens/SplitView.aspx?Mode=Video&MeetingID=1679
        # and look for
        #   <div id="VideoPanel" class="LeftTopContent">
        #     <div id="VideoPanelInner" ... src="/Citizens/VideoScreen.aspx?MediaID=1563&Frame=SplitView">
-        # and feed the canonicalized src element to the generic extractor
+        # and then parse the canonicalized src element
        inner_url_rel = self._html_search_regex(
            r'<div id="VideoPanelInner".*src="([^"]+)"',
            webpage, 'url');

        inner_url = compat_urlparse.urljoin(url, inner_url_rel)
+        webpage = self._download_webpage(inner_url, video_id)

-        # Generic extractor matches this under the "Broaden the
-        # findall a little bit: JWPlayer JS loader" (line 2372 as of 6
-        # Oct 2016, dcdb292fddc82ae11f4c0b647815a45c88a6b6d5).
-        return self.url_result(smuggle_url(inner_url, {'to_generic': True}),
-                               'Generic')
+        info_dict = self._extract_jwplayer_data(
+            webpage, video_id, require_title=False)
+
+        video_title = self._og_search_title(
+            webpage, default=None) or self._html_search_regex(
+            r'(?s)<title>(.*?)</title>', webpage, 'video title',
+            default='video')
+        info_dict['title'] = video_title
+        
+        return info_dict