1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2026-06-15 00:50:13 +00:00

Compare commits

..

13 Commits

Author SHA1 Message Date
Philipp Hagemeister c4cd138b92 release 2014.01.22.4 2014-01-22 21:01:52 +01:00
Philipp Hagemeister 65697b3bf3 Merge branch 'paged-lists'
Conflicts:
	test/test_utils.py
	youtube_dl/extractor/youtube.py
2014-01-22 20:00:16 +01:00
Philipp Hagemeister 50317b111d Merge branch 'youtube-dash-manifest'
Conflicts:
	youtube_dl/extractor/youtube.py
2014-01-22 19:58:31 +01:00
Philipp Hagemeister d7975ea287 [xvideos] Simplify 2014-01-22 19:02:48 +01:00
Mike Col 714d709a31 [xvideos] Fix thumbnail extraction
Signed-off-by: Philipp Hagemeister <phihag@phihag.de>
2014-01-22 19:01:41 +01:00
Philipp Hagemeister 11577ec054 [cspan] Disable test
It works fine from all my machines, no matter where, but from travis, we get lots of 403s.
Maybe another project is scraping CSPAN from travis and they're blocking the travis machines?
2014-01-22 15:10:02 +01:00
Philipp Hagemeister 79bf58f9b5 Document -f worstaudio as well 2014-01-22 14:55:45 +01:00
Philipp Hagemeister cd8a562267 release 2014.01.22.3 2014-01-22 14:53:36 +01:00
Philipp Hagemeister de3ef3ed58 Default to -f best-audio when only audio is requested 2014-01-22 14:53:23 +01:00
Philipp Hagemeister 8908741806 Use unicode_literals in test_YoutubeDL 2014-01-22 14:48:02 +01:00
Philipp Hagemeister ba7678f9cc Add -f bestaudio (Fixes #2163) 2014-01-22 14:47:29 +01:00
Philipp Hagemeister b7ab059084 Add infrastructure for paged lists
This commit allows to download pages in playlists as needed instead of all at once.
Before this commit,
    youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download
took quite some time - now it's almost instantaneous.
As an example, the youtube:user extractor has been converted.
Fixes #2175
2014-01-20 11:36:47 +01:00
Philipp Hagemeister dd27fd1739 [youtube] Download DASH manifest
If given, download and parse the DASH manifest file, in order to get ultra-HQ formats.
Fixes #2166
2014-01-19 05:47:20 +01:00
11 changed files with 287 additions and 120 deletions
+3 -1
View File
@@ -158,7 +158,9 @@ which means you can modify it, redistribute it or use it however you like.
## Video Format Options:
-f, --format FORMAT video format code, specify the order of
preference using slashes: "-f 22/17/18". "-f mp4"
and "-f flv" are also supported
and "-f flv" are also supported. You can also use
the special names "best", "bestaudio", "worst",
and "worstaudio"
--all-formats download all available video formats
--prefer-free-formats prefer free video formats unless a specific one
is requested
+80 -48
View File
@@ -1,5 +1,7 @@
#!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
@@ -30,125 +32,155 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['prefer_free_formats'] = True
formats = [
{u'ext': u'webm', u'height': 460},
{u'ext': u'mp4', u'height': 460},
{'ext': 'webm', 'height': 460},
{'ext': 'mp4', 'height': 460},
]
info_dict = {u'formats': formats, u'extractor': u'test'}
info_dict = {'formats': formats, 'extractor': 'test'}
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'ext'], u'webm')
self.assertEqual(downloaded['ext'], 'webm')
# Different resolution => download best quality (mp4)
ydl = YDL()
ydl.params['prefer_free_formats'] = True
formats = [
{u'ext': u'webm', u'height': 720},
{u'ext': u'mp4', u'height': 1080},
{'ext': 'webm', 'height': 720},
{'ext': 'mp4', 'height': 1080},
]
info_dict[u'formats'] = formats
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'ext'], u'mp4')
self.assertEqual(downloaded['ext'], 'mp4')
# No prefer_free_formats => prefer mp4 and flv for greater compatibilty
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
{u'ext': u'webm', u'height': 720},
{u'ext': u'mp4', u'height': 720},
{u'ext': u'flv', u'height': 720},
{'ext': 'webm', 'height': 720},
{'ext': 'mp4', 'height': 720},
{'ext': 'flv', 'height': 720},
]
info_dict[u'formats'] = formats
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'ext'], u'mp4')
self.assertEqual(downloaded['ext'], 'mp4')
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
{u'ext': u'flv', u'height': 720},
{u'ext': u'webm', u'height': 720},
{'ext': 'flv', 'height': 720},
{'ext': 'webm', 'height': 720},
]
info_dict[u'formats'] = formats
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'ext'], u'flv')
self.assertEqual(downloaded['ext'], 'flv')
def test_format_limit(self):
formats = [
{u'format_id': u'meh', u'url': u'http://example.com/meh', 'preference': 1},
{u'format_id': u'good', u'url': u'http://example.com/good', 'preference': 2},
{u'format_id': u'great', u'url': u'http://example.com/great', 'preference': 3},
{u'format_id': u'excellent', u'url': u'http://example.com/exc', 'preference': 4},
{'format_id': 'meh', 'url': 'http://example.com/meh', 'preference': 1},
{'format_id': 'good', 'url': 'http://example.com/good', 'preference': 2},
{'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
{'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
]
info_dict = {
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
'formats': formats, 'extractor': 'test', 'id': 'testvid'}
ydl = YDL()
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'format_id'], u'excellent')
self.assertEqual(downloaded['format_id'], 'excellent')
ydl = YDL({'format_limit': 'good'})
assert ydl.params['format_limit'] == 'good'
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'format_id'], u'good')
self.assertEqual(downloaded['format_id'], 'good')
ydl = YDL({'format_limit': 'great', 'format': 'all'})
ydl.process_ie_result(info_dict.copy())
self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh')
self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good')
self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great')
self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'meh')
self.assertEqual(ydl.downloaded_info_dicts[1]['format_id'], 'good')
self.assertEqual(ydl.downloaded_info_dicts[2]['format_id'], 'great')
self.assertTrue('3' in ydl.msgs[0])
ydl = YDL()
ydl.params['format_limit'] = 'excellent'
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'format_id'], u'excellent')
self.assertEqual(downloaded['format_id'], 'excellent')
def test_format_selection(self):
formats = [
{u'format_id': u'35', u'ext': u'mp4', 'preference': 1},
{u'format_id': u'45', u'ext': u'webm', 'preference': 2},
{u'format_id': u'47', u'ext': u'webm', 'preference': 3},
{u'format_id': u'2', u'ext': u'flv', 'preference': 4},
{'format_id': '35', 'ext': 'mp4', 'preference': 1},
{'format_id': '45', 'ext': 'webm', 'preference': 2},
{'format_id': '47', 'ext': 'webm', 'preference': 3},
{'format_id': '2', 'ext': 'flv', 'preference': 4},
]
info_dict = {u'formats': formats, u'extractor': u'test'}
info_dict = {'formats': formats, 'extractor': 'test'}
ydl = YDL({'format': u'20/47'})
ydl = YDL({'format': '20/47'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'47')
self.assertEqual(downloaded['format_id'], '47')
ydl = YDL({'format': u'20/71/worst'})
ydl = YDL({'format': '20/71/worst'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'35')
self.assertEqual(downloaded['format_id'], '35')
ydl = YDL()
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'2')
self.assertEqual(downloaded['format_id'], '2')
ydl = YDL({'format': u'webm/mp4'})
ydl = YDL({'format': 'webm/mp4'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'47')
self.assertEqual(downloaded['format_id'], '47')
ydl = YDL({'format': u'3gp/40/mp4'})
ydl = YDL({'format': '3gp/40/mp4'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'35')
self.assertEqual(downloaded['format_id'], '35')
def test_format_selection_audio(self):
formats = [
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'},
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'},
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4},
]
info_dict = {'formats': formats, 'extractor': 'test'}
ydl = YDL({'format': 'bestaudio'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'audio-high')
ydl = YDL({'format': 'worstaudio'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'audio-low')
formats = [
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1},
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2},
]
info_dict = {'formats': formats, 'extractor': 'test'}
ydl = YDL({'format': 'bestaudio/worstaudio/best'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vid-high')
def test_youtube_format_selection(self):
order = [
@@ -200,17 +232,17 @@ class TestFormatSelection(unittest.TestCase):
def test_prepare_filename(self):
info = {
u'id': u'1234',
u'ext': u'mp4',
u'width': None,
'id': '1234',
'ext': 'mp4',
'width': None,
}
def fname(templ):
ydl = YoutubeDL({'outtmpl': templ})
return ydl.prepare_filename(info)
self.assertEqual(fname(u'%(id)s.%(ext)s'), u'1234.mp4')
self.assertEqual(fname(u'%(id)s-%(width)s.%(ext)s'), u'1234-NA.mp4')
self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
# Replace missing fields with 'NA'
self.assertEqual(fname(u'%(uploader_date)s-%(id)s.%(ext)s'), u'NA-1234.mp4')
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
if __name__ == '__main__':
+22
View File
@@ -19,6 +19,7 @@ from youtube_dl.utils import (
fix_xml_ampersands,
get_meta_content,
orderedSet,
PagedList,
parse_duration,
sanitize_filename,
shell_quote,
@@ -214,5 +215,26 @@ class TestUtil(unittest.TestCase):
fix_xml_ampersands('&#1234;&#x1abC;'), '&#1234;&#x1abC;')
self.assertEqual(fix_xml_ampersands('&#&#'), '&amp;#&amp;#')
def test_paged_list(self):
def testPL(size, pagesize, sliceargs, expected):
def get_page(pagenum):
firstid = pagenum * pagesize
upto = min(size, pagenum * pagesize + pagesize)
for i in range(firstid, upto):
yield i
pl = PagedList(get_page, pagesize)
got = pl.getslice(*sliceargs)
self.assertEqual(got, expected)
testPL(5, 2, (), [0, 1, 2, 3, 4])
testPL(5, 2, (1,), [1, 2, 3, 4])
testPL(5, 2, (2,), [2, 3, 4])
testPL(5, 2, (4,), [4])
testPL(5, 2, (0, 3), [0, 1, 2])
testPL(5, 2, (1, 4), [1, 2, 3])
testPL(5, 2, (2, 99), [2, 3, 4])
testPL(5, 2, (20, 99), [])
if __name__ == '__main__':
unittest.main()
+31 -8
View File
@@ -39,6 +39,7 @@ from .utils import (
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
PagedList,
PostProcessingError,
platform_name,
preferredencoding,
@@ -578,19 +579,27 @@ class YoutubeDL(object):
playlist_results = []
n_all_entries = len(ie_result['entries'])
playliststart = self.params.get('playliststart', 1) - 1
playlistend = self.params.get('playlistend', None)
# For backwards compatibility, interpret -1 as whole list
if playlistend == -1:
playlistend = None
entries = ie_result['entries'][playliststart:playlistend]
n_entries = len(entries)
self.to_screen(
"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
if isinstance(ie_result['entries'], list):
n_all_entries = len(ie_result['entries'])
entries = ie_result['entries'][playliststart:playlistend]
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
else:
assert isinstance(ie_result['entries'], PagedList)
entries = ie_result['entries'].getslice(
playliststart, playlistend)
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
@@ -637,6 +646,18 @@ class YoutubeDL(object):
return available_formats[-1]
elif format_spec == 'worst':
return available_formats[0]
elif format_spec == 'bestaudio':
audio_formats = [
f for f in available_formats
if f.get('vcodec') == 'none']
if audio_formats:
return audio_formats[-1]
elif format_spec == 'worstaudio':
audio_formats = [
f for f in available_formats
if f.get('vcodec') == 'none']
if audio_formats:
return audio_formats[0]
else:
extensions = ['mp4', 'flv', 'webm', '3gp']
if format_spec in extensions:
@@ -701,7 +722,7 @@ class YoutubeDL(object):
self.list_formats(info_dict)
return
req_format = self.params.get('format', 'best')
req_format = self.params.get('format')
if req_format is None:
req_format = 'best'
formats_to_download = []
@@ -1094,6 +1115,8 @@ class YoutubeDL(object):
res += 'audio'
if fdict.get('abr') is not None:
res += '@%3dk' % fdict['abr']
if fdict.get('asr') is not None:
res += ' (%5dHz)' % fdict['asr']
if fdict.get('filesize') is not None:
if res:
res += ', '
+6 -2
View File
@@ -256,8 +256,8 @@ def parseOpts(overrideArguments=None):
video_format.add_option('-f', '--format',
action='store', dest='format', metavar='FORMAT', default='best',
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
action='store', dest='format', metavar='FORMAT', default=None,
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestaudio", "worst", and "worstaudio"')
video_format.add_option('--all-formats',
action='store_const', dest='format', help='download all available video formats', const='all')
video_format.add_option('--prefer-free-formats',
@@ -624,6 +624,10 @@ def _real_main(argv=None):
if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
# Do not download videos when there are audio-only formats
if opts.extractaudio and not opts.keepvideo and opts.format is None:
opts.format = 'bestaudio/best'
# --all-sub automatically sets --write-sub if --write-auto-sub is not given
# this was the old behaviour if only --all-sub was given.
if opts.allsubtitles and (opts.writeautomaticsub == False):
+1
View File
@@ -63,6 +63,7 @@ class InfoExtractor(object):
* tbr Average bitrate of audio and video in KBit/s
* abr Average audio bitrate in KBit/s
* acodec Name of the audio codec in use
* asr Audio sampling rate in Hertz
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
* filesize The number of bytes, if known in advance
+1
View File
@@ -20,6 +20,7 @@ class CSpanIE(InfoExtractor):
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
},
'skip': 'Regularly fails on travis, for unknown reasons',
}
def _real_extract(self, url):
+15 -15
View File
@@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@@ -9,12 +11,12 @@ from ..utils import (
class XVideosIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
_TEST = {
u'url': u'http://www.xvideos.com/video939581/funny_porns_by_s_-1',
u'file': u'939581.flv',
u'md5': u'1d0c835822f0a71a7bf011855db929d0',
u'info_dict': {
u"title": u"Funny Porns By >>>>S<<<<<< -1",
u"age_limit": 18,
'url': 'http://www.xvideos.com/video939581/funny_porns_by_s_-1',
'file': '939581.flv',
'md5': '1d0c835822f0a71a7bf011855db929d0',
'info_dict': {
"title": "Funny Porns By >>>>S<<<<<< -1",
"age_limit": 18,
}
}
@@ -27,18 +29,18 @@ class XVideosIE(InfoExtractor):
self.report_extraction(video_id)
# Extract video URL
video_url = compat_urllib_parse.unquote(self._search_regex(r'flv_url=(.+?)&',
webpage, u'video URL'))
video_url = compat_urllib_parse.unquote(
self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
# Extract title
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XVID',
webpage, u'title')
video_title = self._html_search_regex(
r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
# Extract video thumbnail
video_thumbnail = self._search_regex(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)',
webpage, u'thumbnail', fatal=False)
video_thumbnail = self._search_regex(
r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
info = {
return {
'id': video_id,
'url': video_url,
'uploader': None,
@@ -49,5 +51,3 @@ class XVideosIE(InfoExtractor):
'description': None,
'age_limit': 18,
}
return [info]
+81 -43
View File
@@ -27,6 +27,8 @@ from ..utils import (
get_element_by_id,
get_element_by_attribute,
ExtractorError,
int_or_none,
PagedList,
RegexNotFoundError,
unescapeHTML,
unified_strdate,
@@ -270,6 +272,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"uploader_id": u"setindia"
}
},
{
u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
u"file": u"a9LDPn-MO4I.m4a",
u"note": u"256k DASH audio (format 141) via DASH manifest",
u"params": {
u"format": "141"
},
u"info_dict": {
u"upload_date": "20121002",
u"uploader_id": "8KVIDEO",
u"description": "No description available.",
u"uploader": "8KVIDEO",
u"title": "UHDTV TEST 8K VIDEO.mp4"
}
},
]
@@ -1067,18 +1084,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_id = mobj.group(2)
return video_id
def _get_video_url_list(self, url_map):
"""
Transform a dictionary in the format {itag:url} to a list of (itag, url)
with the requested formats.
"""
existing_formats = [x for x in self._formats if x in url_map]
if len(existing_formats) == 0:
raise ExtractorError(u'no known formats available for video')
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
video_url_list.reverse() # order worst to best
return video_url_list
def _extract_from_m3u8(self, manifest_url, video_id):
url_map = {}
def _get_urls(_manifest):
@@ -1252,7 +1257,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_annotations = self._extract_annotations(video_id)
# Decide which formats to download
try:
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
if not mobj:
@@ -1277,9 +1281,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
except ValueError:
pass
def _map_to_format_list(urlmap):
formats = []
for itag, video_real_url in urlmap.items():
dct = {
'format_id': itag,
'url': video_real_url,
'player_url': player_url,
}
dct.update(self._formats[itag])
formats.append(dct)
return formats
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
video_url_list = [('_rtmp', video_info['conn'][0])]
formats = [{
'format_id': '_rtmp',
'protocol': 'rtmp',
'url': video_info['conn'][0],
'player_url': player_url,
}]
elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
@@ -1324,23 +1345,49 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if 'ratebypass' not in url:
url += '&ratebypass=yes'
url_map[url_data['itag'][0]] = url
video_url_list = self._get_video_url_list(url_map)
formats = _map_to_format_list(url_map)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
url_map = self._extract_from_m3u8(manifest_url, video_id)
video_url_list = self._get_video_url_list(url_map)
formats = _map_to_format_list(url_map)
else:
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
formats = []
for itag, video_real_url in video_url_list:
dct = {
'format_id': itag,
'url': video_real_url,
'player_url': player_url,
}
dct.update(self._formats[itag])
formats.append(dct)
# Look for the DASH manifest
dash_manifest_url_lst = video_info.get('dashmpd')
if dash_manifest_url_lst and dash_manifest_url_lst[0]:
try:
dash_doc = self._download_xml(
dash_manifest_url_lst[0], video_id,
note=u'Downloading DASH manifest',
errnote=u'Could not download DASH manifest')
for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
if url_el is None:
continue
format_id = r.attrib['id']
video_url = url_el.text
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
f = {
'format_id': format_id,
'url': video_url,
'width': int_or_none(r.attrib.get('width')),
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
'filesize': filesize,
}
try:
existing_format = next(
fo for fo in formats
if fo['format_id'] == format_id)
except StopIteration:
f.update(self._formats.get(format_id, {}))
formats.append(f)
else:
existing_format.update(f)
except (ExtractorError, KeyError) as e:
self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
self._sort_formats(formats)
@@ -1580,44 +1627,35 @@ class YoutubeUserIE(InfoExtractor):
# page by page until there are no video ids - it means we got
# all of them.
url_results = []
for pagenum in itertools.count(0):
def download_page(pagenum):
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
page = self._download_webpage(gdata_url, username,
u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
page = self._download_webpage(
gdata_url, username,
u'Downloading video ids from %d to %d' % (
start_index, start_index + self._GDATA_PAGE_SIZE))
try:
response = json.loads(page)
except ValueError as err:
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
if 'entry' not in response['feed']:
# Number of videos is a multiple of self._MAX_RESULTS
break
return
# Extract video identifiers
entries = response['feed']['entry']
for entry in entries:
title = entry['title']['$t']
video_id = entry['id']['$t'].split('/')[-1]
url_results.append({
yield {
'_type': 'url',
'url': video_id,
'ie_key': 'Youtube',
'id': 'video_id',
'title': title,
})
# A little optimization - if current page is not
# "full", ie. does not contain PAGE_SIZE video ids then
# we can assume that this page is the last one - there
# are no more ids on further pages - no need to query
# again.
if len(entries) < self._GDATA_PAGE_SIZE:
break
}
url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
return self.playlist_result(url_results, playlist_title=username)
+46 -2
View File
@@ -6,6 +6,7 @@ import datetime
import email.utils
import errno
import gzip
import itertools
import io
import json
import locale
@@ -1131,8 +1132,8 @@ class HEADRequest(compat_urllib_request.Request):
return "HEAD"
def int_or_none(v):
return v if v is None else int(v)
def int_or_none(v, scale=1):
return v if v is None else (int(v) // scale)
def parse_duration(s):
@@ -1164,3 +1165,46 @@ def check_executable(exe, args=[]):
except OSError:
return False
return exe
class PagedList(object):
def __init__(self, pagefunc, pagesize):
self._pagefunc = pagefunc
self._pagesize = pagesize
def getslice(self, start=0, end=None):
res = []
for pagenum in itertools.count(start // self._pagesize):
firstid = pagenum * self._pagesize
nextfirstid = pagenum * self._pagesize + self._pagesize
if start >= nextfirstid:
continue
page_results = list(self._pagefunc(pagenum))
startv = (
start % self._pagesize
if firstid <= start < nextfirstid
else 0)
endv = (
((end - 1) % self._pagesize) + 1
if (end is not None and firstid <= end <= nextfirstid)
else None)
if startv != 0 or endv is not None:
page_results = page_results[startv:endv]
res.extend(page_results)
# A little optimization - if current page is not "full", ie. does
# not contain page_size videos then we can assume that this page
# is the last one - there are no more ids on further pages -
# i.e. no need to query again.
if len(page_results) + startv < self._pagesize:
break
# If we got the whole page, but the next page is not interesting,
# break out early as well
if end == nextfirstid:
break
return res
+1 -1
View File
@@ -1,2 +1,2 @@
__version__ = '2014.01.22.2'
__version__ = '2014.01.22.4'