mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-01-05 09:16:18 +00:00
Merge branch 'master' into build-yml
This commit is contained in:
commit
0292587bf3
1
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
1
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
blank_issues_enabled: false
|
@ -150,7 +150,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). This makes the extractor available for use, as long as the class ends with `IE`.
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||||
|
@ -1069,9 +1069,11 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test (actually, test case) then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note:
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
* the test names use the extractor class name **without the trailing `IE`**
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
* tests with `only_matching` key in test's dict are not counted.
|
||||||
|
8. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||||
|
9. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||||
|
|
||||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||||
|
|
||||||
|
@ -997,6 +997,25 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
self.assertEqual(downloaded['extractor'], 'Video')
|
self.assertEqual(downloaded['extractor'], 'Video')
|
||||||
self.assertEqual(downloaded['extractor_key'], 'Video')
|
self.assertEqual(downloaded['extractor_key'], 'Video')
|
||||||
|
|
||||||
|
def test_default_times(self):
|
||||||
|
"""Test addition of missing upload/release/_date from /release_/timestamp"""
|
||||||
|
info = {
|
||||||
|
'id': '1234',
|
||||||
|
'url': TEST_URL,
|
||||||
|
'title': 'Title',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1631352900,
|
||||||
|
'release_timestamp': 1632995931,
|
||||||
|
}
|
||||||
|
|
||||||
|
params = {'simulate': True, }
|
||||||
|
ydl = FakeYDL(params)
|
||||||
|
out_info = ydl.process_ie_result(info)
|
||||||
|
self.assertTrue(isinstance(out_info['upload_date'], compat_str))
|
||||||
|
self.assertEqual(out_info['upload_date'], '20210911')
|
||||||
|
self.assertTrue(isinstance(out_info['release_date'], compat_str))
|
||||||
|
self.assertEqual(out_info['release_date'], '20210930')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -1529,7 +1529,7 @@ class YoutubeDL(object):
|
|||||||
# see http://bugs.python.org/issue1646728)
|
# see http://bugs.python.org/issue1646728)
|
||||||
try:
|
try:
|
||||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
|
||||||
except (ValueError, OverflowError, OSError):
|
except (ValueError, OverflowError, OSError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -1906,8 +1906,17 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
if not self.params.get('skip_download', False):
|
if not self.params.get('skip_download', False):
|
||||||
try:
|
try:
|
||||||
|
def checked_get_suitable_downloader(info_dict, params):
|
||||||
|
ed_args = params.get('external_downloader_args')
|
||||||
|
dler = get_suitable_downloader(info_dict, params)
|
||||||
|
if ed_args and not params.get('external_downloader_args'):
|
||||||
|
# external_downloader_args was cleared because external_downloader was rejected
|
||||||
|
self.report_warning('Requested external downloader cannot be used: '
|
||||||
|
'ignoring --external-downloader-args.')
|
||||||
|
return dler
|
||||||
|
|
||||||
def dl(name, info):
|
def dl(name, info):
|
||||||
fd = get_suitable_downloader(info, self.params)(self, self.params)
|
fd = checked_get_suitable_downloader(info, self.params)(self, self.params)
|
||||||
for ph in self._progress_hooks:
|
for ph in self._progress_hooks:
|
||||||
fd.add_progress_hook(ph)
|
fd.add_progress_hook(ph)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
|
@ -50,6 +50,9 @@ def _get_suitable_downloader(info_dict, params={}):
|
|||||||
ed = get_external_downloader(external_downloader)
|
ed = get_external_downloader(external_downloader)
|
||||||
if ed.can_download(info_dict):
|
if ed.can_download(info_dict):
|
||||||
return ed
|
return ed
|
||||||
|
# Avoid using unwanted args since external_downloader was rejected
|
||||||
|
if params.get('external_downloader_args'):
|
||||||
|
params['external_downloader_args'] = None
|
||||||
|
|
||||||
protocol = info_dict['protocol']
|
protocol = info_dict['protocol']
|
||||||
if protocol.startswith('m3u8') and info_dict.get('is_live'):
|
if protocol.startswith('m3u8') and info_dict.get('is_live'):
|
||||||
|
@ -18,7 +18,7 @@ class AliExpressLiveIE(InfoExtractor):
|
|||||||
'id': '2800002704436634',
|
'id': '2800002704436634',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'CASIMA7.22',
|
'title': 'CASIMA7.22',
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'uploader': 'CASIMA Official Store',
|
'uploader': 'CASIMA Official Store',
|
||||||
'timestamp': 1500717600,
|
'timestamp': 1500717600,
|
||||||
'upload_date': '20170722',
|
'upload_date': '20170722',
|
||||||
|
89
youtube_dl/extractor/alsace20tv.py
Normal file
89
youtube_dl/extractor/alsace20tv.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
dict_get,
|
||||||
|
get_element_by_class,
|
||||||
|
int_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Alsace20TVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
|
||||||
|
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lyNHCXpYJh',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
|
||||||
|
'title': 'Votre JT du jeudi 3 février',
|
||||||
|
'upload_date': '20220203',
|
||||||
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
|
'duration': 1073,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_video(self, video_id, url=None):
|
||||||
|
info = self._download_json(
|
||||||
|
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
|
||||||
|
video_id) or {}
|
||||||
|
title = info['titre']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for res, fmt_url in (info.get('files') or {}).items():
|
||||||
|
formats.extend(
|
||||||
|
self._extract_smil_formats(fmt_url, video_id, fatal=False)
|
||||||
|
if '/smil:_' in fmt_url
|
||||||
|
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
|
||||||
|
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
|
||||||
|
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
|
||||||
|
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': clean_html(get_element_by_class('wysiwyg', webpage)),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
|
||||||
|
'view_count': int_or_none(info.get('nb_vues')),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self._extract_video(video_id, url)
|
||||||
|
|
||||||
|
|
||||||
|
class Alsace20TVEmbedIE(Alsace20TVIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
|
||||||
|
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lyNHCXpYJh',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Votre JT du jeudi 3 février',
|
||||||
|
'upload_date': '20220203',
|
||||||
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self._extract_video(video_id)
|
59
youtube_dl/extractor/bigo.py
Normal file
59
youtube_dl/extractor/bigo.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError, urlencode_postdata
|
||||||
|
|
||||||
|
|
||||||
|
class BigoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bigo\.tv/(?:[a-z]{2,}/)?(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.bigo.tv/ja/221338632',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6576287577575737440',
|
||||||
|
'title': '土よ〜💁♂️ 休憩室/REST room',
|
||||||
|
'thumbnail': r're:https?://.+',
|
||||||
|
'uploader': '✨Shin💫',
|
||||||
|
'uploader_id': '221338632',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'skip': 'livestream',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bigo.tv/th/Tarlerm1304',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://bigo.tv/115976881',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user_id = self._match_id(url)
|
||||||
|
|
||||||
|
info_raw = self._download_json(
|
||||||
|
'https://bigo.tv/studio/getInternalStudioInfo',
|
||||||
|
user_id, data=urlencode_postdata({'siteId': user_id}))
|
||||||
|
|
||||||
|
if not isinstance(info_raw, dict):
|
||||||
|
raise ExtractorError('Received invalid JSON data')
|
||||||
|
if info_raw.get('code'):
|
||||||
|
raise ExtractorError(
|
||||||
|
'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
|
||||||
|
info = info_raw.get('data') or {}
|
||||||
|
|
||||||
|
if not info.get('alive'):
|
||||||
|
raise ExtractorError('This user is offline.', expected=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': info.get('roomId') or user_id,
|
||||||
|
'title': info.get('roomTopic') or info.get('nick_name') or user_id,
|
||||||
|
'formats': [{
|
||||||
|
'url': info.get('hls_src'),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'protocol': 'm3u8',
|
||||||
|
}],
|
||||||
|
'thumbnail': info.get('snapshot'),
|
||||||
|
'uploader': info.get('nick_name'),
|
||||||
|
'uploader_id': user_id,
|
||||||
|
'is_live': True,
|
||||||
|
}
|
@ -369,6 +369,11 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
|
|||||||
'filesize': int_or_none(play_data.get('size')),
|
'filesize': int_or_none(play_data.get('size')),
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
for a_format in formats:
|
||||||
|
a_format.setdefault('http_headers', {}).update({
|
||||||
|
'Referer': url,
|
||||||
|
})
|
||||||
|
|
||||||
song = self._call_api('song/info', au_id)
|
song = self._call_api('song/info', au_id)
|
||||||
title = song['title']
|
title = song['title']
|
||||||
statistic = song.get('statistic') or {}
|
statistic = song.get('statistic') or {}
|
||||||
|
148
youtube_dl/extractor/cpac.py
Normal file
148
youtube_dl/extractor/cpac.py
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
update_url_query,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
# compat_range
|
||||||
|
try:
|
||||||
|
if callable(xrange):
|
||||||
|
range = xrange
|
||||||
|
except (NameError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CPACIE(InfoExtractor):
|
||||||
|
IE_NAME = 'cpac'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?P<fr>l-)?episode\?id=(?P<id>[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})'
|
||||||
|
_TEST = {
|
||||||
|
# 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909',
|
||||||
|
'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
|
||||||
|
'md5': 'e46ad699caafd7aa6024279f2614e8fa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20220215',
|
||||||
|
'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022',
|
||||||
|
'description': 'md5:466a206abd21f3a6f776cdef290c23fb',
|
||||||
|
'timestamp': 1644901200,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'hls_prefer_native': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
url_lang = 'fr' if '/l-episode?' in url else 'en'
|
||||||
|
|
||||||
|
content = self._download_json(
|
||||||
|
'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id,
|
||||||
|
video_id)
|
||||||
|
video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str)
|
||||||
|
formats = []
|
||||||
|
if video_url:
|
||||||
|
content = content['page']
|
||||||
|
title = str_or_none(content['details']['title_%s_t' % (url_lang, )])
|
||||||
|
formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4')
|
||||||
|
for fmt in formats:
|
||||||
|
# prefer language to match URL
|
||||||
|
fmt_lang = fmt.get('language')
|
||||||
|
if fmt_lang == url_lang:
|
||||||
|
fmt['language_preference'] = 10
|
||||||
|
elif not fmt_lang:
|
||||||
|
fmt['language_preference'] = -1
|
||||||
|
else:
|
||||||
|
fmt['language_preference'] = -10
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
category = str_or_none(content['details']['category_%s_t' % (url_lang, )])
|
||||||
|
|
||||||
|
def is_live(v_type):
|
||||||
|
return (v_type == 'live') if v_type is not None else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))),
|
||||||
|
'timestamp': unified_timestamp(content['details'].get('liveDateTime')),
|
||||||
|
'category': [category] if category else None,
|
||||||
|
'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))),
|
||||||
|
'is_live': is_live(content['details'].get('type')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CPACPlaylistIE(InfoExtractor):
|
||||||
|
IE_NAME = 'cpac:playlist'
|
||||||
|
_VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?P<fr>emission|rechercher))\?(?:[^&]+&)*?(?P<id>(?:id=\d+|programId=\d+|key=[^&]+))'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cpac.ca/program?id=6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'id=6',
|
||||||
|
'title': 'Headline Politics',
|
||||||
|
'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.',
|
||||||
|
},
|
||||||
|
'playlist_count': 10,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'key=hudson',
|
||||||
|
'title': 'hudson',
|
||||||
|
},
|
||||||
|
'playlist_count': 22,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cpac.ca/search?programId=50',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'programId=50',
|
||||||
|
'title': '50',
|
||||||
|
},
|
||||||
|
'playlist_count': 9,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cpac.ca/emission?id=6',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en'
|
||||||
|
pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult')
|
||||||
|
api_url = (
|
||||||
|
'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s'
|
||||||
|
% (pl_type, video_id, ))
|
||||||
|
content = self._download_json(api_url, video_id)
|
||||||
|
entries = []
|
||||||
|
total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1)
|
||||||
|
for page in range(1, total_pages + 1):
|
||||||
|
if page > 1:
|
||||||
|
api_url = update_url_query(api_url, {'page': '%d' % (page, ), })
|
||||||
|
content = self._download_json(
|
||||||
|
api_url, video_id,
|
||||||
|
note='Downloading continuation - %d' % (page, ),
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
|
for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []:
|
||||||
|
episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )]))
|
||||||
|
if episode_url:
|
||||||
|
entries.append(episode_url)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
(self.url_result(entry) for entry in entries),
|
||||||
|
playlist_id=video_id,
|
||||||
|
playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1],
|
||||||
|
playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]),
|
||||||
|
)
|
@ -51,6 +51,10 @@ from .anvato import AnvatoIE
|
|||||||
from .aol import AolIE
|
from .aol import AolIE
|
||||||
from .allocine import AllocineIE
|
from .allocine import AllocineIE
|
||||||
from .aliexpress import AliExpressLiveIE
|
from .aliexpress import AliExpressLiveIE
|
||||||
|
from .alsace20tv import (
|
||||||
|
Alsace20TVIE,
|
||||||
|
Alsace20TVEmbedIE,
|
||||||
|
)
|
||||||
from .apa import APAIE
|
from .apa import APAIE
|
||||||
from .aparat import AparatIE
|
from .aparat import AparatIE
|
||||||
from .appleconnect import AppleConnectIE
|
from .appleconnect import AppleConnectIE
|
||||||
@ -115,6 +119,7 @@ from .bfmtv import (
|
|||||||
)
|
)
|
||||||
from .bibeltv import BibelTVIE
|
from .bibeltv import BibelTVIE
|
||||||
from .bigflix import BigflixIE
|
from .bigflix import BigflixIE
|
||||||
|
from .bigo import BigoIE
|
||||||
from .bild import BildIE
|
from .bild import BildIE
|
||||||
from .bilibili import (
|
from .bilibili import (
|
||||||
BiliBiliIE,
|
BiliBiliIE,
|
||||||
@ -254,6 +259,10 @@ from .commonprotocols import (
|
|||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .contv import CONtvIE
|
from .contv import CONtvIE
|
||||||
from .corus import CorusIE
|
from .corus import CorusIE
|
||||||
|
from .cpac import (
|
||||||
|
CPACIE,
|
||||||
|
CPACPlaylistIE,
|
||||||
|
)
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
from .crackle import CrackleIE
|
from .crackle import CrackleIE
|
||||||
from .crooksandliars import CrooksAndLiarsIE
|
from .crooksandliars import CrooksAndLiarsIE
|
||||||
@ -470,6 +479,7 @@ from .hotstar import (
|
|||||||
)
|
)
|
||||||
from .howcast import HowcastIE
|
from .howcast import HowcastIE
|
||||||
from .howstuffworks import HowStuffWorksIE
|
from .howstuffworks import HowStuffWorksIE
|
||||||
|
from .hrfernsehen import HRFernsehenIE
|
||||||
from .hrti import (
|
from .hrti import (
|
||||||
HRTiIE,
|
HRTiIE,
|
||||||
HRTiPlaylistIE,
|
HRTiPlaylistIE,
|
||||||
@ -548,6 +558,7 @@ from .kinja import KinjaEmbedIE
|
|||||||
from .kinopoisk import KinoPoiskIE
|
from .kinopoisk import KinoPoiskIE
|
||||||
from .konserthusetplay import KonserthusetPlayIE
|
from .konserthusetplay import KonserthusetPlayIE
|
||||||
from .krasview import KrasViewIE
|
from .krasview import KrasViewIE
|
||||||
|
from .kth import KTHIE
|
||||||
from .ku6 import Ku6IE
|
from .ku6 import Ku6IE
|
||||||
from .kusi import KUSIIE
|
from .kusi import KUSIIE
|
||||||
from .kuwo import (
|
from .kuwo import (
|
||||||
|
101
youtube_dl/extractor/hrfernsehen.py
Normal file
101
youtube_dl/extractor/hrfernsehen.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
unescapeHTML
|
||||||
|
)
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class HRFernsehenIE(InfoExtractor):
|
||||||
|
IE_NAME = 'hrfernsehen'
|
||||||
|
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
||||||
|
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '130546',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'Sturmtief Kirsten fegt über Hessen / Die Corona-Pandemie – eine Chronologie / '
|
||||||
|
'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / '
|
||||||
|
'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music',
|
||||||
|
'subtitles': {'de': [{
|
||||||
|
'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt'
|
||||||
|
}]},
|
||||||
|
'timestamp': 1598470200,
|
||||||
|
'upload_date': '20200826',
|
||||||
|
'thumbnail': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
|
||||||
|
'title': 'hessenschau vom 26.08.2020'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html',
|
||||||
|
'only_matching': True
|
||||||
|
}]
|
||||||
|
|
||||||
|
_GEO_COUNTRIES = ['DE']
|
||||||
|
|
||||||
|
def extract_airdate(self, loader_data):
|
||||||
|
airdate_str = loader_data.get('mediaMetadata', {}).get('agf', {}).get('airdate')
|
||||||
|
|
||||||
|
if airdate_str is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return unified_timestamp(airdate_str)
|
||||||
|
|
||||||
|
def extract_formats(self, loader_data):
|
||||||
|
stream_formats = []
|
||||||
|
for stream_obj in loader_data["videoResolutionLevels"]:
|
||||||
|
stream_format = {
|
||||||
|
'format_id': str(stream_obj['verticalResolution']) + "p",
|
||||||
|
'height': stream_obj['verticalResolution'],
|
||||||
|
'url': stream_obj['url'],
|
||||||
|
}
|
||||||
|
|
||||||
|
quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
|
||||||
|
stream_obj['url'])
|
||||||
|
if quality_information:
|
||||||
|
stream_format['width'] = int_or_none(quality_information.group(1))
|
||||||
|
stream_format['height'] = int_or_none(quality_information.group(2))
|
||||||
|
stream_format['fps'] = int_or_none(quality_information.group(3))
|
||||||
|
stream_format['tbr'] = int_or_none(quality_information.group(4))
|
||||||
|
|
||||||
|
stream_formats.append(stream_format)
|
||||||
|
|
||||||
|
self._sort_formats(stream_formats)
|
||||||
|
return stream_formats
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
['og:title', 'twitter:title', 'name'], webpage)
|
||||||
|
description = self._html_search_meta(
|
||||||
|
['description'], webpage)
|
||||||
|
|
||||||
|
loader_str = unescapeHTML(self._search_regex(r"data-new-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader"))
|
||||||
|
loader_data = json.loads(loader_str)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'formats': self.extract_formats(loader_data),
|
||||||
|
'timestamp': self.extract_airdate(loader_data)
|
||||||
|
}
|
||||||
|
|
||||||
|
if "subtitle" in loader_data:
|
||||||
|
info["subtitles"] = {"de": [{"url": loader_data["subtitle"]}]}
|
||||||
|
|
||||||
|
thumbnails = list(set([t for t in loader_data.get("previewImageUrl", {}).values()]))
|
||||||
|
if len(thumbnails) > 0:
|
||||||
|
info["thumbnails"] = [{"url": t} for t in thumbnails]
|
||||||
|
|
||||||
|
return info
|
@ -373,5 +373,5 @@ class KalturaIE(InfoExtractor):
|
|||||||
'duration': info.get('duration'),
|
'duration': info.get('duration'),
|
||||||
'timestamp': info.get('createdAt'),
|
'timestamp': info.get('createdAt'),
|
||||||
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
|
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
|
||||||
'view_count': info.get('plays'),
|
'view_count': int_or_none(info.get('plays')),
|
||||||
}
|
}
|
||||||
|
31
youtube_dl/extractor/kth.py
Normal file
31
youtube_dl/extractor/kth.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
|
|
||||||
|
class KTHIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
|
||||||
|
'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0_uoop6oz9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
|
||||||
|
'thumbnail': 're:https?://.+/thumbnail/.+',
|
||||||
|
'duration': 3516,
|
||||||
|
'timestamp': 1647345358,
|
||||||
|
'upload_date': '20220315',
|
||||||
|
'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
result = self.url_result(
|
||||||
|
smuggle_url('kaltura:308:%s' % video_id, {
|
||||||
|
'service_url': 'https://api.kaltura.nordu.net'}),
|
||||||
|
'Kaltura')
|
||||||
|
return result
|
@ -35,7 +35,9 @@ class MySpassIE(InfoExtractor):
|
|||||||
title = xpath_text(metadata, 'title', fatal=True)
|
title = xpath_text(metadata, 'title', fatal=True)
|
||||||
video_url = xpath_text(metadata, 'url_flv', 'download url', True)
|
video_url = xpath_text(metadata, 'url_flv', 'download url', True)
|
||||||
video_id_int = int(video_id)
|
video_id_int = int(video_id)
|
||||||
for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
|
|
||||||
|
grps = re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url)
|
||||||
|
for group in grps.groups() if grps else []:
|
||||||
group_int = int(group)
|
group_int = int(group)
|
||||||
if group_int > video_id_int:
|
if group_int > video_id_int:
|
||||||
video_url = video_url.replace(
|
video_url = video_url.replace(
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -23,7 +24,7 @@ class NhkBaseIE(InfoExtractor):
|
|||||||
def _extract_episode_info(self, url, episode=None):
|
def _extract_episode_info(self, url, episode=None):
|
||||||
fetch_episode = episode is None
|
fetch_episode = episode is None
|
||||||
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
|
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
|
||||||
if episode_id.isdigit():
|
if len(episode_id) == 7:
|
||||||
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
||||||
|
|
||||||
is_video = m_type == 'video'
|
is_video = m_type == 'video'
|
||||||
@ -84,7 +85,8 @@ class NhkBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class NhkVodIE(NhkBaseIE):
|
class NhkVodIE(NhkBaseIE):
|
||||||
_VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
# the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
|
||||||
|
_VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||||
# Content available only for a limited period of time. Visit
|
# Content available only for a limited period of time. Visit
|
||||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -124,6 +126,19 @@ class NhkVodIE(NhkBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# video, alphabetic character in ID #29670
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
|
||||||
|
'only_matching': True,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'qfjay6cg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'DESIGN TALKS plus - Fishermen’s Finery',
|
||||||
|
'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
|
||||||
|
'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
|
||||||
|
'upload_date': '20210615',
|
||||||
|
'timestamp': 1623722008,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -5,15 +5,16 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urlparse,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
|
HEADRequest,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
remove_start,
|
remove_start,
|
||||||
@ -96,12 +97,100 @@ class RaiBaseIE(InfoExtractor):
|
|||||||
if not formats and geoprotection is True:
|
if not formats and geoprotection is True:
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
|
|
||||||
|
formats.extend(self._create_http_urls(relinker_url, formats))
|
||||||
|
|
||||||
return dict((k, v) for k, v in {
|
return dict((k, v) for k, v in {
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}.items() if v is not None)
|
}.items() if v is not None)
|
||||||
|
|
||||||
|
def _create_http_urls(self, relinker_url, fmts):
|
||||||
|
_RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
|
||||||
|
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
|
||||||
|
_QUALITY = {
|
||||||
|
# tbr: w, h
|
||||||
|
'250': [352, 198],
|
||||||
|
'400': [512, 288],
|
||||||
|
'700': [512, 288],
|
||||||
|
'800': [700, 394],
|
||||||
|
'1200': [736, 414],
|
||||||
|
'1800': [1024, 576],
|
||||||
|
'2400': [1280, 720],
|
||||||
|
'3200': [1440, 810],
|
||||||
|
'3600': [1440, 810],
|
||||||
|
'5000': [1920, 1080],
|
||||||
|
'10000': [1920, 1080],
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_url(url):
|
||||||
|
resp = self._request_webpage(
|
||||||
|
HEADRequest(url), None, headers={'User-Agent': 'Rai'},
|
||||||
|
fatal=False, errnote=False, note=False)
|
||||||
|
|
||||||
|
if resp is False:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if resp.code == 200:
|
||||||
|
return False if resp.url == url else resp.url
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_format_info(tbr):
|
||||||
|
import math
|
||||||
|
br = int_or_none(tbr)
|
||||||
|
if len(fmts) == 1 and not br:
|
||||||
|
br = fmts[0].get('tbr')
|
||||||
|
if br > 300:
|
||||||
|
tbr = compat_str(math.floor(br / 100) * 100)
|
||||||
|
else:
|
||||||
|
tbr = '250'
|
||||||
|
|
||||||
|
# try extracting info from available m3u8 formats
|
||||||
|
format_copy = None
|
||||||
|
for f in fmts:
|
||||||
|
if f.get('tbr'):
|
||||||
|
br_limit = math.floor(br / 100)
|
||||||
|
if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1:
|
||||||
|
format_copy = f.copy()
|
||||||
|
return {
|
||||||
|
'width': format_copy.get('width'),
|
||||||
|
'height': format_copy.get('height'),
|
||||||
|
'tbr': format_copy.get('tbr'),
|
||||||
|
'vcodec': format_copy.get('vcodec'),
|
||||||
|
'acodec': format_copy.get('acodec'),
|
||||||
|
'fps': format_copy.get('fps'),
|
||||||
|
'format_id': 'https-%s' % tbr,
|
||||||
|
} if format_copy else {
|
||||||
|
'width': _QUALITY[tbr][0],
|
||||||
|
'height': _QUALITY[tbr][1],
|
||||||
|
'format_id': 'https-%s' % tbr,
|
||||||
|
'tbr': int(tbr),
|
||||||
|
}
|
||||||
|
|
||||||
|
loc = test_url(_MP4_TMPL % (relinker_url, '*'))
|
||||||
|
if not isinstance(loc, compat_str):
|
||||||
|
return []
|
||||||
|
|
||||||
|
mobj = re.match(
|
||||||
|
_RELINKER_REG,
|
||||||
|
test_url(relinker_url) or '')
|
||||||
|
if not mobj:
|
||||||
|
return []
|
||||||
|
|
||||||
|
available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*']
|
||||||
|
available_qualities = [i for i in available_qualities if i]
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for q in available_qualities:
|
||||||
|
fmt = {
|
||||||
|
'url': _MP4_TMPL % (relinker_url, q),
|
||||||
|
'protocol': 'https',
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
||||||
|
fmt.update(get_format_info(q))
|
||||||
|
formats.append(fmt)
|
||||||
|
return formats
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_subtitles(url, video_data):
|
def _extract_subtitles(url, video_data):
|
||||||
STL_EXT = 'stl'
|
STL_EXT = 'stl'
|
||||||
@ -151,6 +240,22 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# 1080p direct mp4 url
|
||||||
|
'url': 'https://www.raiplay.it/video/2021/03/Leonardo-S1E1-b5703b02-82ee-475a-85b6-c9e4a8adf642.html',
|
||||||
|
'md5': '2e501e8651d72f05ffe8f5d286ad560b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b5703b02-82ee-475a-85b6-c9e4a8adf642',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Leonardo - S1E1',
|
||||||
|
'alt_title': 'St 1 Ep 1 - Episodio 1',
|
||||||
|
'description': 'md5:f5360cd267d2de146e4e3879a5a47d31',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'Rai 1',
|
||||||
|
'duration': 3229,
|
||||||
|
'series': 'Leonardo',
|
||||||
|
'season': 'Season 1',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -158,6 +263,10 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
# subtitles at 'subtitlesArray' key (see #27698)
|
# subtitles at 'subtitlesArray' key (see #27698)
|
||||||
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
|
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# DRM protected
|
||||||
|
'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -166,6 +275,13 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
base + '.json', video_id, 'Downloading video JSON')
|
base + '.json', video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
if try_get(
|
||||||
|
media,
|
||||||
|
(lambda x: x['rights_management']['rights']['drm'],
|
||||||
|
lambda x: x['program_info']['rights_management']['rights']['drm']),
|
||||||
|
dict):
|
||||||
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
title = media['name']
|
title = media['name']
|
||||||
|
|
||||||
video = media['video']
|
video = media['video']
|
||||||
@ -307,7 +423,7 @@ class RaiIE(RaiBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
# with ContentItem in og:url
|
# with ContentItem in og:url
|
||||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
||||||
'md5': '6865dd00cf0bbf5772fdd89d59bd768a',
|
'md5': '06345bd97c932f19ffb129973d07a020',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -62,7 +62,7 @@ class StreamCZIE(InfoExtractor):
|
|||||||
if not stream.get('url'):
|
if not stream.get('url'):
|
||||||
continue
|
continue
|
||||||
yield merge_dicts({
|
yield merge_dicts({
|
||||||
'format_id': '{}-{}'.format(format_id, ext),
|
'format_id': '-'.join((format_id, ext)),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'source_preference': pref,
|
'source_preference': pref,
|
||||||
'url': urljoin(spl_url, stream['url']),
|
'url': urljoin(spl_url, stream['url']),
|
||||||
|
@ -271,7 +271,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
)?
|
)?
|
||||||
vimeo(?:pro)?\.com/
|
vimeo(?:pro)?\.com/
|
||||||
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||||
(?:.*?/)?
|
(?:.*?/)??
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
play_redirect_hls|
|
play_redirect_hls|
|
||||||
@ -517,14 +517,28 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
'url': 'https://vimeo.com/7809605',
|
'url': 'https://vimeo.com/7809605',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'url': 'https://vimeo.com/160743502/abd0e13fb4',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
# requires passing unlisted_hash(a52724358e) to load_download_config request
|
# requires passing unlisted_hash(a52724358e) to load_download_config request
|
||||||
'url': 'https://vimeo.com/392479337/a52724358e',
|
'url': 'https://vimeo.com/392479337/a52724358e',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# similar, but all numeric: ID must be 581039021, not 9603038895
|
||||||
|
# issue #29690
|
||||||
|
'url': 'https://vimeo.com/581039021/9603038895',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '581039021',
|
||||||
|
# these have to be provided but we don't care
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1627621014,
|
||||||
|
'title': 're:.+',
|
||||||
|
'uploader_id': 're:.+',
|
||||||
|
'uploader': 're:.+',
|
||||||
|
'upload_date': r're:\d+',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
# https://gettingthingsdone.com/workflowmap/
|
# https://gettingthingsdone.com/workflowmap/
|
||||||
# vimeo embed with check-password page protected by Referer header
|
# vimeo embed with check-password page protected by Referer header
|
||||||
|
@ -57,7 +57,7 @@ class WatIE(InfoExtractor):
|
|||||||
# 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
|
# 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'https://mediainfo.tf1.fr/mediainfocombo/' + video_id,
|
'https://mediainfo.tf1.fr/mediainfocombo/' + video_id,
|
||||||
video_id, query={'context': 'MYTF1'})
|
video_id, query={'context': 'MYTF1', 'pver': '4001000'})
|
||||||
video_info = video_data['media']
|
video_info = video_data['media']
|
||||||
|
|
||||||
error_desc = video_info.get('error_desc')
|
error_desc = video_info.get('error_desc')
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -137,9 +138,10 @@ class YouPornIE(InfoExtractor):
|
|||||||
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
[r'UPLOADED:\s*<span>([^<]+)',
|
(r'UPLOADED:\s*<span>([^<]+)',
|
||||||
r'Date\s+[Aa]dded:\s*<span>([^<]+)',
|
r'Date\s+[Aa]dded:\s*<span>([^<]+)',
|
||||||
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
|
r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
|
||||||
|
r'(?s)<label\b[^>]*>Uploaded[^<]*</label>\s*<span\b[^>]*>(.+?)</span>'),
|
||||||
webpage, 'upload date', fatal=False))
|
webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
|
@ -1464,16 +1464,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
|
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
|
||||||
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
|
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
|
||||||
def _extract_n_function_name(self, jscode):
|
def _extract_n_function_name(self, jscode):
|
||||||
target = r'(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?'
|
target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?'
|
||||||
nfunc_and_idx = self._search_regex(
|
nfunc_and_idx = self._search_regex(
|
||||||
r'\.get\("n"\)\)&&\(b=(%s)\([a-zA-Z0-9]\)' % (target, ),
|
r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ),
|
||||||
jscode, 'Initial JS player n function name')
|
jscode, 'Initial JS player n function name')
|
||||||
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
|
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
|
||||||
if not idx:
|
if not idx:
|
||||||
return nfunc
|
return nfunc
|
||||||
|
if int_or_none(idx) == 0:
|
||||||
|
real_nfunc = self._search_regex(
|
||||||
|
r'var %s\s*=\s*\[([a-zA-Z_$][\w$]*)\];' % (re.escape(nfunc), ), jscode,
|
||||||
|
'Initial JS player n function alias ({nfunc}[{idx}])'.format(**locals()))
|
||||||
|
if real_nfunc:
|
||||||
|
return real_nfunc
|
||||||
return self._parse_json(self._search_regex(
|
return self._parse_json(self._search_regex(
|
||||||
r'var %s\s*=\s*(\[.+?\]);' % (nfunc, ), jscode,
|
r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode,
|
||||||
'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
|
'Initial JS player n function name ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
|
||||||
|
|
||||||
def _extract_n_function(self, video_id, player_url):
|
def _extract_n_function(self, video_id, player_url):
|
||||||
player_id = self._extract_player_info(player_url)
|
player_id = self._extract_player_info(player_url)
|
||||||
@ -1482,7 +1488,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if func_code:
|
if func_code:
|
||||||
jsi = JSInterpreter(func_code)
|
jsi = JSInterpreter(func_code)
|
||||||
else:
|
else:
|
||||||
player_id = self._extract_player_info(player_url)
|
|
||||||
jscode = self._get_player_code(video_id, player_url, player_id)
|
jscode = self._get_player_code(video_id, player_url, player_id)
|
||||||
funcname = self._extract_n_function_name(jscode)
|
funcname = self._extract_n_function_name(jscode)
|
||||||
jsi = JSInterpreter(jscode)
|
jsi = JSInterpreter(jscode)
|
||||||
|
@ -270,11 +270,11 @@ def parseOpts(overrideArguments=None):
|
|||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--match-title',
|
'--match-title',
|
||||||
dest='matchtitle', metavar='REGEX',
|
dest='matchtitle', metavar='REGEX',
|
||||||
help='Download only matching titles (regex or caseless sub-string)')
|
help='Download only matching titles (case-insensitive regex or sub-string)')
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--reject-title',
|
'--reject-title',
|
||||||
dest='rejecttitle', metavar='REGEX',
|
dest='rejecttitle', metavar='REGEX',
|
||||||
help='Skip download for matching titles (regex or caseless sub-string)')
|
help='Skip download for matching titles (case-insensitive regex or sub-string)')
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--max-downloads',
|
'--max-downloads',
|
||||||
dest='max_downloads', metavar='NUMBER', type=int, default=None,
|
dest='max_downloads', metavar='NUMBER', type=int, default=None,
|
||||||
|
@ -2151,7 +2151,7 @@ def sanitize_url(url):
|
|||||||
for mistake, fixup in COMMON_TYPOS:
|
for mistake, fixup in COMMON_TYPOS:
|
||||||
if re.match(mistake, url):
|
if re.match(mistake, url):
|
||||||
return re.sub(mistake, fixup, url)
|
return re.sub(mistake, fixup, url)
|
||||||
return url
|
return escape_url(url)
|
||||||
|
|
||||||
|
|
||||||
def sanitized_Request(url, *args, **kwargs):
|
def sanitized_Request(url, *args, **kwargs):
|
||||||
@ -2292,12 +2292,30 @@ def formatSeconds(secs):
|
|||||||
|
|
||||||
|
|
||||||
def make_HTTPS_handler(params, **kwargs):
|
def make_HTTPS_handler(params, **kwargs):
|
||||||
|
|
||||||
|
# https://www.rfc-editor.org/info/rfc7301
|
||||||
|
ALPN_PROTOCOLS = ['http/1.1']
|
||||||
|
|
||||||
|
def set_alpn_protocols(ctx):
|
||||||
|
# From https://github.com/yt-dlp/yt-dlp/commit/2c6dcb65fb612fc5bc5c61937bf438d3c473d8d0
|
||||||
|
# Thanks @coletdjnz
|
||||||
|
# Some servers may (wrongly) reject requests if ALPN extension is not sent. See:
|
||||||
|
# https://github.com/python/cpython/issues/85140
|
||||||
|
# https://github.com/yt-dlp/yt-dlp/issues/3878
|
||||||
|
try:
|
||||||
|
ctx.set_alpn_protocols(ALPN_PROTOCOLS)
|
||||||
|
except (AttributeError, NotImplementedError):
|
||||||
|
# Python < 2.7.10, not ssl.HAS_ALPN
|
||||||
|
pass
|
||||||
|
|
||||||
opts_no_check_certificate = params.get('nocheckcertificate', False)
|
opts_no_check_certificate = params.get('nocheckcertificate', False)
|
||||||
if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
|
if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
|
||||||
context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
|
context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
|
||||||
|
set_alpn_protocols(context)
|
||||||
if opts_no_check_certificate:
|
if opts_no_check_certificate:
|
||||||
context.check_hostname = False
|
context.check_hostname = False
|
||||||
context.verify_mode = ssl.CERT_NONE
|
context.verify_mode = ssl.CERT_NONE
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
|
return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
@ -2313,6 +2331,7 @@ def make_HTTPS_handler(params, **kwargs):
|
|||||||
if opts_no_check_certificate
|
if opts_no_check_certificate
|
||||||
else ssl.CERT_REQUIRED)
|
else ssl.CERT_REQUIRED)
|
||||||
context.set_default_verify_paths()
|
context.set_default_verify_paths()
|
||||||
|
set_alpn_protocols(context)
|
||||||
return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
|
return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user