1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2025-01-11 03:58:58 +00:00

[crooksandliars] Improve embed extractor and remove article extractor

This commit is contained in:
Sergey M․ 2015-04-11 20:03:12 +06:00
parent af14ded75e
commit 7a91d1fc43
2 changed files with 39 additions and 50 deletions

View File

@ -90,7 +90,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .cracked import CrackedIE from .cracked import CrackedIE
from .criterion import CriterionIE from .criterion import CriterionIE
from .crooksandliars import CrooksAndLiarsIE, CrooksAndLiarsArticleIE from .crooksandliars import CrooksAndLiarsIE
from .crunchyroll import ( from .crunchyroll import (
CrunchyrollIE, CrunchyrollIE,
CrunchyrollShowPlaylistIE CrunchyrollShowPlaylistIE

View File

@ -1,71 +1,60 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
mimetype2ext, int_or_none,
qualities,
) )
class CrooksAndLiarsIE(InfoExtractor): class CrooksAndLiarsIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//embed.crooksandliars.com/embed/(?P<id>[A-Za-z0-9]+)(?:$|[?#])' _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi',
'info_dict': { 'info_dict': {
'id': 'https://embed.crooksandliars.com/embed/8RUoRhRi', 'id': '8RUoRhRi',
'ext': 'mp4',
'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!", 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
'description': "Fox News, Fox & Friends Weekend, April 4, 2015. Read more... http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists", 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
'thumbnail': 're:^https?://.*\.jpg',
'timestamp': 1428207000, 'timestamp': 1428207000,
'thumbnail': 'https://crooksandliars.com/files/mediaposters/2015/04/31235.jpg?ts=1428207050', 'upload_date': '20150405',
'uploader': "Heather", 'uploader': 'Heather',
'duration': 236,
} }
}] }, {
'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA',
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
manifest = json.loads(self._html_search_regex(r'var manifest = ({.*?})\n', webpage, 'manifest JSON'))
formats = []
for item in manifest['flavors']:
if not item['mime'].startswith('video/'): # XXX: or item['exclude']?
continue
formats.append({
'format_id': item['type'],
'ext': mimetype2ext(item['mime']),
'url': item['url'],
})
# XXX: manifest['url']?
return {
'url': url,
'id': video_id,
'uploader': manifest['author'],
'title': manifest['title'],
'description': manifest['description'],
'thumbnail': self._proto_relative_url(manifest['poster']),
'duration': manifest['duration'],
'timestamp': int(manifest['created']),
'formats': formats,
}
class CrooksAndLiarsArticleIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//crooksandliars.com/\d+/\d+/(?P<id>[a-z\-]+)(?:/|$)'
_TESTS = [{
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_url = self._proto_relative_url(self._html_search_regex(r'<iframe src="(//embed.crooksandliars.com/.*)"', webpage, 'embedded player')) webpage = self._download_webpage(
'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
manifest = self._parse_json(
self._search_regex(
r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
video_id)
quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))
formats = [{
'url': item['url'],
'format_id': item['type'],
'quality': quality(item['type']),
} for item in manifest['flavors'] if item['mime'].startswith('video/')]
self._sort_formats(formats)
return { return {
'_type': 'url', 'url': url,
'url': player_url 'id': video_id,
'title': manifest['title'],
'description': manifest.get('description'),
'thumbnail': self._proto_relative_url(manifest.get('poster')),
'timestamp': int_or_none(manifest.get('created')),
'uploader': manifest.get('author'),
'duration': int_or_none(manifest.get('duration')),
'formats': formats,
} }