mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2026-06-15 00:50:13 +00:00
Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| c4cd138b92 | |||
| 65697b3bf3 | |||
| 50317b111d | |||
| d7975ea287 | |||
| 714d709a31 | |||
| 11577ec054 | |||
| 79bf58f9b5 | |||
| cd8a562267 | |||
| de3ef3ed58 | |||
| 8908741806 | |||
| ba7678f9cc | |||
| b7ab059084 | |||
| dd27fd1739 |
@@ -158,7 +158,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specify the order of
|
||||
preference using slashes: "-f 22/17/18". "-f mp4"
|
||||
and "-f flv" are also supported
|
||||
and "-f flv" are also supported. You can also use
|
||||
the special names "best", "bestaudio", "worst",
|
||||
and "worstaudio"
|
||||
--all-formats download all available video formats
|
||||
--prefer-free-formats prefer free video formats unless a specific one
|
||||
is requested
|
||||
|
||||
+80
-48
@@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
@@ -30,125 +32,155 @@ class TestFormatSelection(unittest.TestCase):
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = True
|
||||
formats = [
|
||||
{u'ext': u'webm', u'height': 460},
|
||||
{u'ext': u'mp4', u'height': 460},
|
||||
{'ext': 'webm', 'height': 460},
|
||||
{'ext': 'mp4', 'height': 460},
|
||||
]
|
||||
info_dict = {u'formats': formats, u'extractor': u'test'}
|
||||
info_dict = {'formats': formats, 'extractor': 'test'}
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'ext'], u'webm')
|
||||
self.assertEqual(downloaded['ext'], 'webm')
|
||||
|
||||
# Different resolution => download best quality (mp4)
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = True
|
||||
formats = [
|
||||
{u'ext': u'webm', u'height': 720},
|
||||
{u'ext': u'mp4', u'height': 1080},
|
||||
{'ext': 'webm', 'height': 720},
|
||||
{'ext': 'mp4', 'height': 1080},
|
||||
]
|
||||
info_dict[u'formats'] = formats
|
||||
info_dict['formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'ext'], u'mp4')
|
||||
self.assertEqual(downloaded['ext'], 'mp4')
|
||||
|
||||
# No prefer_free_formats => prefer mp4 and flv for greater compatibilty
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
{u'ext': u'webm', u'height': 720},
|
||||
{u'ext': u'mp4', u'height': 720},
|
||||
{u'ext': u'flv', u'height': 720},
|
||||
{'ext': 'webm', 'height': 720},
|
||||
{'ext': 'mp4', 'height': 720},
|
||||
{'ext': 'flv', 'height': 720},
|
||||
]
|
||||
info_dict[u'formats'] = formats
|
||||
info_dict['formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'ext'], u'mp4')
|
||||
self.assertEqual(downloaded['ext'], 'mp4')
|
||||
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
{u'ext': u'flv', u'height': 720},
|
||||
{u'ext': u'webm', u'height': 720},
|
||||
{'ext': 'flv', 'height': 720},
|
||||
{'ext': 'webm', 'height': 720},
|
||||
]
|
||||
info_dict[u'formats'] = formats
|
||||
info_dict['formats'] = formats
|
||||
yie = YoutubeIE(ydl)
|
||||
yie._sort_formats(info_dict['formats'])
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'ext'], u'flv')
|
||||
self.assertEqual(downloaded['ext'], 'flv')
|
||||
|
||||
def test_format_limit(self):
|
||||
formats = [
|
||||
{u'format_id': u'meh', u'url': u'http://example.com/meh', 'preference': 1},
|
||||
{u'format_id': u'good', u'url': u'http://example.com/good', 'preference': 2},
|
||||
{u'format_id': u'great', u'url': u'http://example.com/great', 'preference': 3},
|
||||
{u'format_id': u'excellent', u'url': u'http://example.com/exc', 'preference': 4},
|
||||
{'format_id': 'meh', 'url': 'http://example.com/meh', 'preference': 1},
|
||||
{'format_id': 'good', 'url': 'http://example.com/good', 'preference': 2},
|
||||
{'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
|
||||
{'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
|
||||
]
|
||||
info_dict = {
|
||||
u'formats': formats, u'extractor': u'test', 'id': 'testvid'}
|
||||
'formats': formats, 'extractor': 'test', 'id': 'testvid'}
|
||||
|
||||
ydl = YDL()
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'format_id'], u'excellent')
|
||||
self.assertEqual(downloaded['format_id'], 'excellent')
|
||||
|
||||
ydl = YDL({'format_limit': 'good'})
|
||||
assert ydl.params['format_limit'] == 'good'
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'format_id'], u'good')
|
||||
self.assertEqual(downloaded['format_id'], 'good')
|
||||
|
||||
ydl = YDL({'format_limit': 'great', 'format': 'all'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh')
|
||||
self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good')
|
||||
self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great')
|
||||
self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'meh')
|
||||
self.assertEqual(ydl.downloaded_info_dicts[1]['format_id'], 'good')
|
||||
self.assertEqual(ydl.downloaded_info_dicts[2]['format_id'], 'great')
|
||||
self.assertTrue('3' in ydl.msgs[0])
|
||||
|
||||
ydl = YDL()
|
||||
ydl.params['format_limit'] = 'excellent'
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded[u'format_id'], u'excellent')
|
||||
self.assertEqual(downloaded['format_id'], 'excellent')
|
||||
|
||||
def test_format_selection(self):
|
||||
formats = [
|
||||
{u'format_id': u'35', u'ext': u'mp4', 'preference': 1},
|
||||
{u'format_id': u'45', u'ext': u'webm', 'preference': 2},
|
||||
{u'format_id': u'47', u'ext': u'webm', 'preference': 3},
|
||||
{u'format_id': u'2', u'ext': u'flv', 'preference': 4},
|
||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1},
|
||||
{'format_id': '45', 'ext': 'webm', 'preference': 2},
|
||||
{'format_id': '47', 'ext': 'webm', 'preference': 3},
|
||||
{'format_id': '2', 'ext': 'flv', 'preference': 4},
|
||||
]
|
||||
info_dict = {u'formats': formats, u'extractor': u'test'}
|
||||
info_dict = {'formats': formats, 'extractor': 'test'}
|
||||
|
||||
ydl = YDL({'format': u'20/47'})
|
||||
ydl = YDL({'format': '20/47'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'47')
|
||||
self.assertEqual(downloaded['format_id'], '47')
|
||||
|
||||
ydl = YDL({'format': u'20/71/worst'})
|
||||
ydl = YDL({'format': '20/71/worst'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'35')
|
||||
self.assertEqual(downloaded['format_id'], '35')
|
||||
|
||||
ydl = YDL()
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'2')
|
||||
self.assertEqual(downloaded['format_id'], '2')
|
||||
|
||||
ydl = YDL({'format': u'webm/mp4'})
|
||||
ydl = YDL({'format': 'webm/mp4'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'47')
|
||||
self.assertEqual(downloaded['format_id'], '47')
|
||||
|
||||
ydl = YDL({'format': u'3gp/40/mp4'})
|
||||
ydl = YDL({'format': '3gp/40/mp4'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], u'35')
|
||||
self.assertEqual(downloaded['format_id'], '35')
|
||||
|
||||
def test_format_selection_audio(self):
|
||||
formats = [
|
||||
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'},
|
||||
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'},
|
||||
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4},
|
||||
]
|
||||
info_dict = {'formats': formats, 'extractor': 'test'}
|
||||
|
||||
ydl = YDL({'format': 'bestaudio'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'audio-high')
|
||||
|
||||
ydl = YDL({'format': 'worstaudio'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'audio-low')
|
||||
|
||||
formats = [
|
||||
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1},
|
||||
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2},
|
||||
]
|
||||
info_dict = {'formats': formats, 'extractor': 'test'}
|
||||
|
||||
ydl = YDL({'format': 'bestaudio/worstaudio/best'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'vid-high')
|
||||
|
||||
def test_youtube_format_selection(self):
|
||||
order = [
|
||||
@@ -200,17 +232,17 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def test_prepare_filename(self):
|
||||
info = {
|
||||
u'id': u'1234',
|
||||
u'ext': u'mp4',
|
||||
u'width': None,
|
||||
'id': '1234',
|
||||
'ext': 'mp4',
|
||||
'width': None,
|
||||
}
|
||||
def fname(templ):
|
||||
ydl = YoutubeDL({'outtmpl': templ})
|
||||
return ydl.prepare_filename(info)
|
||||
self.assertEqual(fname(u'%(id)s.%(ext)s'), u'1234.mp4')
|
||||
self.assertEqual(fname(u'%(id)s-%(width)s.%(ext)s'), u'1234-NA.mp4')
|
||||
self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
|
||||
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
|
||||
# Replace missing fields with 'NA'
|
||||
self.assertEqual(fname(u'%(uploader_date)s-%(id)s.%(ext)s'), u'NA-1234.mp4')
|
||||
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -19,6 +19,7 @@ from youtube_dl.utils import (
|
||||
fix_xml_ampersands,
|
||||
get_meta_content,
|
||||
orderedSet,
|
||||
PagedList,
|
||||
parse_duration,
|
||||
sanitize_filename,
|
||||
shell_quote,
|
||||
@@ -214,5 +215,26 @@ class TestUtil(unittest.TestCase):
|
||||
fix_xml_ampersands('Ӓ᪼'), 'Ӓ᪼')
|
||||
self.assertEqual(fix_xml_ampersands('&#&#'), '&#&#')
|
||||
|
||||
def test_paged_list(self):
|
||||
def testPL(size, pagesize, sliceargs, expected):
|
||||
def get_page(pagenum):
|
||||
firstid = pagenum * pagesize
|
||||
upto = min(size, pagenum * pagesize + pagesize)
|
||||
for i in range(firstid, upto):
|
||||
yield i
|
||||
|
||||
pl = PagedList(get_page, pagesize)
|
||||
got = pl.getslice(*sliceargs)
|
||||
self.assertEqual(got, expected)
|
||||
|
||||
testPL(5, 2, (), [0, 1, 2, 3, 4])
|
||||
testPL(5, 2, (1,), [1, 2, 3, 4])
|
||||
testPL(5, 2, (2,), [2, 3, 4])
|
||||
testPL(5, 2, (4,), [4])
|
||||
testPL(5, 2, (0, 3), [0, 1, 2])
|
||||
testPL(5, 2, (1, 4), [1, 2, 3])
|
||||
testPL(5, 2, (2, 99), [2, 3, 4])
|
||||
testPL(5, 2, (20, 99), [])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
+31
-8
@@ -39,6 +39,7 @@ from .utils import (
|
||||
locked_file,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
PagedList,
|
||||
PostProcessingError,
|
||||
platform_name,
|
||||
preferredencoding,
|
||||
@@ -578,19 +579,27 @@ class YoutubeDL(object):
|
||||
|
||||
playlist_results = []
|
||||
|
||||
n_all_entries = len(ie_result['entries'])
|
||||
playliststart = self.params.get('playliststart', 1) - 1
|
||||
playlistend = self.params.get('playlistend', None)
|
||||
# For backwards compatibility, interpret -1 as whole list
|
||||
if playlistend == -1:
|
||||
playlistend = None
|
||||
|
||||
entries = ie_result['entries'][playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
|
||||
self.to_screen(
|
||||
"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
if isinstance(ie_result['entries'], list):
|
||||
n_all_entries = len(ie_result['entries'])
|
||||
entries = ie_result['entries'][playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
else:
|
||||
assert isinstance(ie_result['entries'], PagedList)
|
||||
entries = ie_result['entries'].getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Downloading %d videos" %
|
||||
(ie_result['extractor'], playlist, n_entries))
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
|
||||
@@ -637,6 +646,18 @@ class YoutubeDL(object):
|
||||
return available_formats[-1]
|
||||
elif format_spec == 'worst':
|
||||
return available_formats[0]
|
||||
elif format_spec == 'bestaudio':
|
||||
audio_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
return audio_formats[-1]
|
||||
elif format_spec == 'worstaudio':
|
||||
audio_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
return audio_formats[0]
|
||||
else:
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp']
|
||||
if format_spec in extensions:
|
||||
@@ -701,7 +722,7 @@ class YoutubeDL(object):
|
||||
self.list_formats(info_dict)
|
||||
return
|
||||
|
||||
req_format = self.params.get('format', 'best')
|
||||
req_format = self.params.get('format')
|
||||
if req_format is None:
|
||||
req_format = 'best'
|
||||
formats_to_download = []
|
||||
@@ -1094,6 +1115,8 @@ class YoutubeDL(object):
|
||||
res += 'audio'
|
||||
if fdict.get('abr') is not None:
|
||||
res += '@%3dk' % fdict['abr']
|
||||
if fdict.get('asr') is not None:
|
||||
res += ' (%5dHz)' % fdict['asr']
|
||||
if fdict.get('filesize') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
|
||||
@@ -256,8 +256,8 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
|
||||
video_format.add_option('-f', '--format',
|
||||
action='store', dest='format', metavar='FORMAT', default='best',
|
||||
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
|
||||
action='store', dest='format', metavar='FORMAT', default=None,
|
||||
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestaudio", "worst", and "worstaudio"')
|
||||
video_format.add_option('--all-formats',
|
||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||
video_format.add_option('--prefer-free-formats',
|
||||
@@ -624,6 +624,10 @@ def _real_main(argv=None):
|
||||
if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
|
||||
parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
|
||||
|
||||
# Do not download videos when there are audio-only formats
|
||||
if opts.extractaudio and not opts.keepvideo and opts.format is None:
|
||||
opts.format = 'bestaudio/best'
|
||||
|
||||
# --all-sub automatically sets --write-sub if --write-auto-sub is not given
|
||||
# this was the old behaviour if only --all-sub was given.
|
||||
if opts.allsubtitles and (opts.writeautomaticsub == False):
|
||||
|
||||
@@ -63,6 +63,7 @@ class InfoExtractor(object):
|
||||
* tbr Average bitrate of audio and video in KBit/s
|
||||
* abr Average audio bitrate in KBit/s
|
||||
* acodec Name of the audio codec in use
|
||||
* asr Audio sampling rate in Hertz
|
||||
* vbr Average video bitrate in KBit/s
|
||||
* vcodec Name of the video codec in use
|
||||
* filesize The number of bytes, if known in advance
|
||||
|
||||
@@ -20,6 +20,7 @@ class CSpanIE(InfoExtractor):
|
||||
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
||||
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
||||
},
|
||||
'skip': 'Regularly fails on travis, for unknown reasons',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -9,12 +11,12 @@ from ..utils import (
|
||||
class XVideosIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.xvideos.com/video939581/funny_porns_by_s_-1',
|
||||
u'file': u'939581.flv',
|
||||
u'md5': u'1d0c835822f0a71a7bf011855db929d0',
|
||||
u'info_dict': {
|
||||
u"title": u"Funny Porns By >>>>S<<<<<< -1",
|
||||
u"age_limit": 18,
|
||||
'url': 'http://www.xvideos.com/video939581/funny_porns_by_s_-1',
|
||||
'file': '939581.flv',
|
||||
'md5': '1d0c835822f0a71a7bf011855db929d0',
|
||||
'info_dict': {
|
||||
"title": "Funny Porns By >>>>S<<<<<< -1",
|
||||
"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,18 +29,18 @@ class XVideosIE(InfoExtractor):
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Extract video URL
|
||||
video_url = compat_urllib_parse.unquote(self._search_regex(r'flv_url=(.+?)&',
|
||||
webpage, u'video URL'))
|
||||
video_url = compat_urllib_parse.unquote(
|
||||
self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL'))
|
||||
|
||||
# Extract title
|
||||
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XVID',
|
||||
webpage, u'title')
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
|
||||
|
||||
# Extract video thumbnail
|
||||
video_thumbnail = self._search_regex(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/[a-fA-F0-9]+/([a-fA-F0-9.]+jpg)',
|
||||
webpage, u'thumbnail', fatal=False)
|
||||
video_thumbnail = self._search_regex(
|
||||
r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
@@ -49,5 +51,3 @@ class XVideosIE(InfoExtractor):
|
||||
'description': None,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
return [info]
|
||||
|
||||
@@ -27,6 +27,8 @@ from ..utils import (
|
||||
get_element_by_id,
|
||||
get_element_by_attribute,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
PagedList,
|
||||
RegexNotFoundError,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@@ -270,6 +272,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
u"uploader_id": u"setindia"
|
||||
}
|
||||
},
|
||||
{
|
||||
u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
|
||||
u"file": u"a9LDPn-MO4I.m4a",
|
||||
u"note": u"256k DASH audio (format 141) via DASH manifest",
|
||||
u"params": {
|
||||
u"format": "141"
|
||||
},
|
||||
u"info_dict": {
|
||||
u"upload_date": "20121002",
|
||||
u"uploader_id": "8KVIDEO",
|
||||
u"description": "No description available.",
|
||||
u"uploader": "8KVIDEO",
|
||||
u"title": "UHDTV TEST 8K VIDEO.mp4"
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -1067,18 +1084,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
video_id = mobj.group(2)
|
||||
return video_id
|
||||
|
||||
def _get_video_url_list(self, url_map):
|
||||
"""
|
||||
Transform a dictionary in the format {itag:url} to a list of (itag, url)
|
||||
with the requested formats.
|
||||
"""
|
||||
existing_formats = [x for x in self._formats if x in url_map]
|
||||
if len(existing_formats) == 0:
|
||||
raise ExtractorError(u'no known formats available for video')
|
||||
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
||||
video_url_list.reverse() # order worst to best
|
||||
return video_url_list
|
||||
|
||||
def _extract_from_m3u8(self, manifest_url, video_id):
|
||||
url_map = {}
|
||||
def _get_urls(_manifest):
|
||||
@@ -1252,7 +1257,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
video_annotations = self._extract_annotations(video_id)
|
||||
|
||||
# Decide which formats to download
|
||||
|
||||
try:
|
||||
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
|
||||
if not mobj:
|
||||
@@ -1277,9 +1281,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
def _map_to_format_list(urlmap):
|
||||
formats = []
|
||||
for itag, video_real_url in urlmap.items():
|
||||
dct = {
|
||||
'format_id': itag,
|
||||
'url': video_real_url,
|
||||
'player_url': player_url,
|
||||
}
|
||||
dct.update(self._formats[itag])
|
||||
formats.append(dct)
|
||||
return formats
|
||||
|
||||
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
|
||||
self.report_rtmp_download()
|
||||
video_url_list = [('_rtmp', video_info['conn'][0])]
|
||||
formats = [{
|
||||
'format_id': '_rtmp',
|
||||
'protocol': 'rtmp',
|
||||
'url': video_info['conn'][0],
|
||||
'player_url': player_url,
|
||||
}]
|
||||
elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
|
||||
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0]
|
||||
if 'rtmpe%3Dyes' in encoded_url_map:
|
||||
@@ -1324,23 +1345,49 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if 'ratebypass' not in url:
|
||||
url += '&ratebypass=yes'
|
||||
url_map[url_data['itag'][0]] = url
|
||||
video_url_list = self._get_video_url_list(url_map)
|
||||
formats = _map_to_format_list(url_map)
|
||||
elif video_info.get('hlsvp'):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
||||
video_url_list = self._get_video_url_list(url_map)
|
||||
formats = _map_to_format_list(url_map)
|
||||
else:
|
||||
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||
|
||||
formats = []
|
||||
for itag, video_real_url in video_url_list:
|
||||
dct = {
|
||||
'format_id': itag,
|
||||
'url': video_real_url,
|
||||
'player_url': player_url,
|
||||
}
|
||||
dct.update(self._formats[itag])
|
||||
formats.append(dct)
|
||||
# Look for the DASH manifest
|
||||
dash_manifest_url_lst = video_info.get('dashmpd')
|
||||
if dash_manifest_url_lst and dash_manifest_url_lst[0]:
|
||||
try:
|
||||
dash_doc = self._download_xml(
|
||||
dash_manifest_url_lst[0], video_id,
|
||||
note=u'Downloading DASH manifest',
|
||||
errnote=u'Could not download DASH manifest')
|
||||
for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
|
||||
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
|
||||
if url_el is None:
|
||||
continue
|
||||
format_id = r.attrib['id']
|
||||
video_url = url_el.text
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'width': int_or_none(r.attrib.get('width')),
|
||||
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
|
||||
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
|
||||
'filesize': filesize,
|
||||
}
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
if fo['format_id'] == format_id)
|
||||
except StopIteration:
|
||||
f.update(self._formats.get(format_id, {}))
|
||||
formats.append(f)
|
||||
else:
|
||||
existing_format.update(f)
|
||||
|
||||
except (ExtractorError, KeyError) as e:
|
||||
self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -1580,44 +1627,35 @@ class YoutubeUserIE(InfoExtractor):
|
||||
# page by page until there are no video ids - it means we got
|
||||
# all of them.
|
||||
|
||||
url_results = []
|
||||
|
||||
for pagenum in itertools.count(0):
|
||||
def download_page(pagenum):
|
||||
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
|
||||
|
||||
gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
|
||||
page = self._download_webpage(gdata_url, username,
|
||||
u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
|
||||
page = self._download_webpage(
|
||||
gdata_url, username,
|
||||
u'Downloading video ids from %d to %d' % (
|
||||
start_index, start_index + self._GDATA_PAGE_SIZE))
|
||||
|
||||
try:
|
||||
response = json.loads(page)
|
||||
except ValueError as err:
|
||||
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
|
||||
if 'entry' not in response['feed']:
|
||||
# Number of videos is a multiple of self._MAX_RESULTS
|
||||
break
|
||||
return
|
||||
|
||||
# Extract video identifiers
|
||||
entries = response['feed']['entry']
|
||||
for entry in entries:
|
||||
title = entry['title']['$t']
|
||||
video_id = entry['id']['$t'].split('/')[-1]
|
||||
url_results.append({
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': video_id,
|
||||
'ie_key': 'Youtube',
|
||||
'id': 'video_id',
|
||||
'title': title,
|
||||
})
|
||||
|
||||
# A little optimization - if current page is not
|
||||
# "full", ie. does not contain PAGE_SIZE video ids then
|
||||
# we can assume that this page is the last one - there
|
||||
# are no more ids on further pages - no need to query
|
||||
# again.
|
||||
|
||||
if len(entries) < self._GDATA_PAGE_SIZE:
|
||||
break
|
||||
}
|
||||
url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(url_results, playlist_title=username)
|
||||
|
||||
|
||||
+46
-2
@@ -6,6 +6,7 @@ import datetime
|
||||
import email.utils
|
||||
import errno
|
||||
import gzip
|
||||
import itertools
|
||||
import io
|
||||
import json
|
||||
import locale
|
||||
@@ -1131,8 +1132,8 @@ class HEADRequest(compat_urllib_request.Request):
|
||||
return "HEAD"
|
||||
|
||||
|
||||
def int_or_none(v):
|
||||
return v if v is None else int(v)
|
||||
def int_or_none(v, scale=1):
|
||||
return v if v is None else (int(v) // scale)
|
||||
|
||||
|
||||
def parse_duration(s):
|
||||
@@ -1164,3 +1165,46 @@ def check_executable(exe, args=[]):
|
||||
except OSError:
|
||||
return False
|
||||
return exe
|
||||
|
||||
|
||||
class PagedList(object):
|
||||
def __init__(self, pagefunc, pagesize):
|
||||
self._pagefunc = pagefunc
|
||||
self._pagesize = pagesize
|
||||
|
||||
def getslice(self, start=0, end=None):
|
||||
res = []
|
||||
for pagenum in itertools.count(start // self._pagesize):
|
||||
firstid = pagenum * self._pagesize
|
||||
nextfirstid = pagenum * self._pagesize + self._pagesize
|
||||
if start >= nextfirstid:
|
||||
continue
|
||||
|
||||
page_results = list(self._pagefunc(pagenum))
|
||||
|
||||
startv = (
|
||||
start % self._pagesize
|
||||
if firstid <= start < nextfirstid
|
||||
else 0)
|
||||
|
||||
endv = (
|
||||
((end - 1) % self._pagesize) + 1
|
||||
if (end is not None and firstid <= end <= nextfirstid)
|
||||
else None)
|
||||
|
||||
if startv != 0 or endv is not None:
|
||||
page_results = page_results[startv:endv]
|
||||
res.extend(page_results)
|
||||
|
||||
# A little optimization - if current page is not "full", ie. does
|
||||
# not contain page_size videos then we can assume that this page
|
||||
# is the last one - there are no more ids on further pages -
|
||||
# i.e. no need to query again.
|
||||
if len(page_results) + startv < self._pagesize:
|
||||
break
|
||||
|
||||
# If we got the whole page, but the next page is not interesting,
|
||||
# break out early as well
|
||||
if end == nextfirstid:
|
||||
break
|
||||
return res
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.01.22.2'
|
||||
__version__ = '2014.01.22.4'
|
||||
|
||||
Reference in New Issue
Block a user