From fd61f317bf71b91abbc5cd256027877b70f6dbce Mon Sep 17 00:00:00 2001
From: Tim Mann <tim@tim-mann.org>
Date: Sat, 13 Feb 2021 16:26:33 -0800
Subject: [PATCH] Step 1 of a rewrite to find and parse embedded JSON instead
 of just running a regexp over the whole page.  This version passes the tests,
 but more work is needed.

---
 youtube_dl/extractor/pac12.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/pac12.py b/youtube_dl/extractor/pac12.py
index 534261e15..07da749d3 100644
--- a/youtube_dl/extractor/pac12.py
+++ b/youtube_dl/extractor/pac12.py
@@ -1,5 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
+from __future__ import print_function #XXX
+import pprint #XXX
 
 import re
 
@@ -32,9 +34,15 @@ class Pac12IE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        video_url = \
-            self._search_regex(r'"manifest_url":"(?P<url>https:[^"]+)"',
-                               webpage, 'url', group='url', default=None)
+
+        drupal_settings = self._parse_json(
+            self._search_regex(
+                r'<script[^>]+type="application/json"[^>]*data-drupal-selector="drupal-settings-json">([^<]+)</script>',
+                webpage, 'drupal settings'), video_id)
+        pprint.pprint(drupal_settings.get('currentVideo'))
+
+        video_url = drupal_settings.get('currentVideo', {}).get('manifest_url')
+
         vod_url = None
         if (video_url is None) or ('vod-' not in url):
             vod_url = self._search_regex(r'(https?://(?:embed\.)?pac-12\.com/(?:embed/)?vod-[0-9a-zA-Z]+)',