1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-14 13:27:27 +00:00
youtube-dl/youtube_dl/extractor/cbslocal.py

120 lines
4.0 KiB
Python
Raw Normal View History

# coding: utf-8
from __future__ import unicode_literals
from .anvato import AnvatoIE
from .sendtonews import SendtoNewsIE
from ..compat import compat_urlparse
from ..utils import (
parse_iso8601,
unified_timestamp,
)
class CBSLocalIE(AnvatoIE):
2020-12-24 11:40:34 +00:00
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
'info_dict': {
'id': '3580809',
'ext': 'mp4',
'title': 'A Very Blue Anniversary',
'description': 'CBS2s Cindy Hsu has more.',
'thumbnail': 're:^https?://.*',
'timestamp': int,
'upload_date': r're:^\d{8}$',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
'categories': [
'Stations\\Spoken Word\\WCBSTV',
'Syndication\\AOL',
'Syndication\\MSN',
'Syndication\\NDN',
'Syndication\\Yahoo',
'Content\\News',
'Content\\News\\Local News',
],
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
mcp_id = self._match_id(url)
return self.url_result(
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
class CBSLocalArticleIE(AnvatoIE):
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
_TESTS = [{
# Anvato backend
'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
'md5': 'f0ee3081e3843f575fccef901199b212',
'info_dict': {
'id': '3401037',
'ext': 'mp4',
'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
'thumbnail': 're:^https?://.*',
'timestamp': 1463440500,
'upload_date': '20160516',
2016-11-04 14:33:08 +00:00
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
'categories': [
'Stations\\Spoken Word\\KCBSTV',
'Syndication\\MSN',
'Syndication\\NDN',
'Syndication\\AOL',
'Syndication\\Yahoo',
'Syndication\\Tribune',
'Syndication\\Curb.tv',
'Content\\News'
],
2016-11-04 14:33:08 +00:00
'tags': ['CBS 2 News Evening'],
},
}, {
# SendtoNews embed
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
'info_dict': {
'id': 'GxfCe0Zo7D-175909-5588',
},
2016-08-15 05:37:37 +00:00
'playlist_count': 9,
'params': {
# m3u8 download
'skip_download': True,
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
sendtonews_url = SendtoNewsIE._extract_url(webpage)
if sendtonews_url:
2016-08-15 05:37:37 +00:00
return self.url_result(
compat_urlparse.urljoin(url, sendtonews_url),
ie=SendtoNewsIE.ie_key())
info_dict = self._extract_anvato_videos(webpage, display_id)
timestamp = unified_timestamp(self._html_search_regex(
r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
'released date', default=None)) or parse_iso8601(
self._html_search_meta('uploadDate', webpage))
info_dict.update({
'display_id': display_id,
'timestamp': timestamp,
})
return info_dict