diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 058eb4321..4b35244a8 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@
---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.03**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2018.10.05
+[debug] youtube-dl version 2018.12.03
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/.travis.yml b/.travis.yml
index 92f326860..79287ccf6 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,6 +15,18 @@ env:
- YTDL_TEST_SET=download
matrix:
include:
+ - python: 3.7
+ dist: xenial
+ env: YTDL_TEST_SET=core
+ - python: 3.7
+ dist: xenial
+ env: YTDL_TEST_SET=download
+ - python: 3.8-dev
+ dist: xenial
+ env: YTDL_TEST_SET=core
+ - python: 3.8-dev
+ dist: xenial
+ env: YTDL_TEST_SET=download
- env: JYTHON=true; YTDL_TEST_SET=core
- env: JYTHON=true; YTDL_TEST_SET=download
fast_finish: true
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 333acee80..bbcb78808 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -296,5 +296,26 @@ title = self._search_regex(
### Use safe conversion functions
-Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+
+Use `url_or_none` for safe URL processing.
+
+Use `try_get` for safe metadata extraction from parsed JSON.
+
+Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
+
+#### More examples
+
+##### Safely extract optional description from parsed JSON
+```python
+description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
+```
+
+##### Safely extract more optional metadata
+```python
+video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+description = video.get('summary')
+duration = float_or_none(video.get('durationMs'), scale=1000)
+view_count = int_or_none(video.get('views'))
+```
diff --git a/ChangeLog b/ChangeLog
index 86cf489b1..689d07826 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,115 @@
+version 2018.12.03
+
+Core
+* [utils] Fix random_birthday to generate existing dates only (#18284)
+
+Extractors
++ [tiktok] Add support for tiktok.com (#18108, #18135)
+* [pornhub] Use actual URL host for requests (#18359)
+* [lynda] Fix authentication (#18158, #18217)
+* [gfycat] Update API endpoint (#18333, #18343)
++ [hotstar] Add support for alternative app state layout (#18320)
+* [azmedien] Fix extraction (#18334, #18336)
++ [vimeo] Add support for VHX (Vimeo OTT) (#14835)
+* [joj] Fix extraction (#18280, #18281)
++ [wistia] Add support for fast.wistia.com (#18287)
+
+
+version 2018.11.23
+
+Core
++ [setup.py] Add more relevant classifiers
+
+Extractors
+* [mixcloud] Fallback to hardcoded decryption key (#18016)
+* [nbc:news] Fix article extraction (#16194)
+* [foxsports] Fix extraction (#17543)
+* [loc] Relax regular expression and improve formats extraction
++ [ciscolive] Add support for ciscolive.cisco.com (#17984)
+* [nzz] Relax kaltura regex (#18228)
+* [sixplay] Fix formats extraction
+* [bitchute] Improve title extraction
+* [kaltura] Limit requested MediaEntry fields
++ [americastestkitchen] Add support for zype embeds (#18225)
++ [pornhub] Add pornhub.net alias
+* [nova:embed] Fix extraction (#18222)
+
+
+version 2018.11.18
+
+Extractors
++ [wwe] Extract subtitles
++ [wwe] Add support for playlistst (#14781)
++ [wwe] Add support for wwe.com (#14781, #17450)
+* [vk] Detect geo restriction (#17767)
+* [openload] Use original host during extraction (#18211)
+* [atvat] Fix extraction (#18041)
++ [rte] Add support for new API endpoint (#18206)
+* [tnaflixnetwork:embed] Fix extraction (#18205)
+* [picarto] Use API and add token support (#16518)
++ [zype] Add support for player.zype.com (#18143)
+* [vivo] Fix extraction (#18139)
+* [ruutu] Update API endpoint (#18138)
+
+
+version 2018.11.07
+
+Extractors
++ [youtube] Add another JS signature function name regex (#18091, #18093,
+ #18094)
+* [facebook] Fix tahoe request (#17171)
+* [cliphunter] Fix extraction (#18083)
++ [youtube:playlist] Add support for invidio.us (#18077)
+* [zattoo] Arrange API hosts for derived extractors (#18035)
++ [youtube] Add fallback metadata extraction from videoDetails (#18052)
+
+
+version 2018.11.03
+
+Core
+* [extractor/common] Ensure response handle is not prematurely closed before
+ it can be read if it matches expected_status (#17195, #17846, #17447)
+
+Extractors
+* [laola1tv:embed] Set correct stream access URL scheme (#16341)
++ [ehftv] Add support for ehftv.com (#15408)
+* [azmedien] Adopt to major site redesign (#17745, #17746)
++ [twitcasting] Add support for twitcasting.tv (#17981)
+* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
++ [openload] Add support for oload.fun (#18045)
+* [njpwworld] Fix authentication (#17427)
++ [linkedin:learning] Add support for linkedin.com/learning (#13545)
+* [theplatform] Improve error detection (#13222)
+* [cnbc] Simplify extraction (#14280, #17110)
++ [cbnc] Add support for new URL schema (#14193)
+* [aparat] Improve extraction and extract more metadata (#17445, #18008)
+* [aparat] Fix extraction
+
+
+version 2018.10.29
+
+Core
++ [extractor/common] Add validation for JSON-LD URLs
+
+Extractors
++ [sportbox] Add support for matchtv.ru
+* [sportbox] Fix extraction (#17978)
+* [screencast] Fix extraction (#14590, #14617, #17990)
++ [openload] Add support for oload.icu
++ [ivi] Add support for ivi.tv
+* [crunchyroll] Improve extraction failsafeness (#17991)
+* [dailymail] Fix formats extraction (#17976)
+* [viewster] Reduce format requests
+* [cwtv] Handle API errors (#17905)
++ [rutube] Use geo verification headers (#17897)
++ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
+- [tv3] Remove extractor (#10461, #15339)
+* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
++ [openload] Add support for oload.cc (#17823)
++ [patreon] Extract post_file URL (#17792)
+* [patreon] Fix extraction (#14502, #10471)
+
+
version 2018.10.05
Extractors
diff --git a/README.md b/README.md
index fdd115c9b..35c3de512 100644
--- a/README.md
+++ b/README.md
@@ -1168,7 +1168,28 @@ title = self._search_regex(
### Use safe conversion functions
-Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+
+Use `url_or_none` for safe URL processing.
+
+Use `try_get` for safe metadata extraction from parsed JSON.
+
+Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
+
+#### More examples
+
+##### Safely extract optional description from parsed JSON
+```python
+description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
+```
+
+##### Safely extract more optional metadata
+```python
+video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+description = video.get('summary')
+duration = float_or_none(video.get('durationMs'), scale=1000)
+view_count = int_or_none(video.get('views'))
+```
# EMBEDDING YOUTUBE-DL
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index f167a6ddc..837b0199b 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -84,8 +84,6 @@
- **awaan:season**
- **awaan:video**
- **AZMedien**: AZ Medien videos
- - **AZMedienPlaylist**: AZ Medien playlists
- - **AZMedienShowPlaylist**: AZ Medien show playlists
- **BaiduVideo**: 百度视频
- **bambuser**
- **bambuser:channel**
@@ -165,6 +163,8 @@
- **chirbit**
- **chirbit:profile**
- **Cinchcast**
+ - **CiscoLiveSearch**
+ - **CiscoLiveSession**
- **CJSW**
- **cliphunter**
- **Clippit**
@@ -178,6 +178,7 @@
- **Clyp**
- **cmt.com**
- **CNBC**
+ - **CNBCVideo**
- **CNN**
- **CNNArticle**
- **CNNBlogs**
@@ -251,6 +252,7 @@
- **EchoMsk**
- **egghead:course**: egghead.io course
- **egghead:lesson**: egghead.io lesson
+ - **ehftv**
- **eHow**
- **EinsUndEinsTV**
- **Einthusan**
@@ -445,6 +447,8 @@
- **limelight:channel**
- **limelight:channel_list**
- **LineTV**
+ - **linkedin:learning**
+ - **linkedin:learning:course**
- **LiTV**
- **LiveLeak**
- **LiveLeakEmbed**
@@ -818,7 +822,7 @@
- **Spiegeltv**
- **sport.francetvinfo.fr**
- **Sport5**
- - **SportBoxEmbed**
+ - **SportBox**
- **SportDeutschland**
- **SpringboardPlatform**
- **Sprout**
@@ -881,6 +885,8 @@
- **ThisAmericanLife**
- **ThisAV**
- **ThisOldHouse**
+ - **TikTok**
+ - **TikTokUser**
- **tinypic**: tinypic.com videos
- **TMZ**
- **TMZArticle**
@@ -909,7 +915,6 @@
- **TV2**
- **tv2.hu**
- **TV2Article**
- - **TV3**
- **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+
- **TVA**
@@ -931,6 +936,7 @@
- **TVPlayer**
- **TVPlayHome**
- **Tweakers**
+ - **TwitCasting**
- **twitch:chapter**
- **twitch:clips**
- **twitch:profile**
@@ -975,6 +981,7 @@
- **VevoPlaylist**
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com**
+ - **vhx:embed**
- **Viafree**
- **vice**
- **vice:article**
@@ -1078,6 +1085,7 @@
- **wrzuta.pl:playlist**
- **WSJ**: Wall Street Journal
- **WSJArticle**
+ - **WWE**
- **XBef**
- **XboxClips**
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
@@ -1137,3 +1145,4 @@
- **ZDF**
- **ZDFChannel**
- **zingmp3**: mp3.zing.vn
+ - **Zype**
diff --git a/setup.py b/setup.py
index 7dbb5805f..dfb669ad2 100644
--- a/setup.py
+++ b/setup.py
@@ -124,6 +124,8 @@ setup(
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'License :: Public Domain',
+ 'Programming Language :: Python',
+ 'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
@@ -132,6 +134,13 @@ setup(
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3.8',
+ 'Programming Language :: Python :: Implementation',
+ 'Programming Language :: Python :: Implementation :: CPython',
+ 'Programming Language :: Python :: Implementation :: IronPython',
+ 'Programming Language :: Python :: Implementation :: Jython',
+ 'Programming Language :: Python :: Implementation :: PyPy',
],
cmdclass={'build_lazy_extractors': build_lazy_extractors},
diff --git a/test/helper.py b/test/helper.py
index dfee217a9..aa9a1c9b2 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -7,6 +7,7 @@ import json
import os.path
import re
import types
+import ssl
import sys
import youtube_dl.extractor
@@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re):
real_warning(w)
ydl.report_warning = _report_warning
+
+
+def http_server_port(httpd):
+ if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
+ # In Jython SSLSocket is not a subclass of socket.socket
+ sock = httpd.socket.sock
+ else:
+ sock = httpd.socket
+ return sock.getsockname()[1]
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 4833396a5..06be72616 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -9,11 +9,30 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, expect_dict, expect_value
-from youtube_dl.compat import compat_etree_fromstring
+from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
+from youtube_dl.compat import compat_etree_fromstring, compat_http_server
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
+import threading
+
+
+TEAPOT_RESPONSE_STATUS = 418
+TEAPOT_RESPONSE_BODY = "
418 I'm a teapot
"
+
+
+class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ def log_message(self, format, *args):
+ pass
+
+ def do_GET(self):
+ if self.path == '/teapot':
+ self.send_response(TEAPOT_RESPONSE_STATUS)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.end_headers()
+ self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
+ else:
+ assert False
class TestIE(InfoExtractor):
@@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
for i in range(len(entries)):
expect_dict(self, entries[i], expected_entries[i])
+ def test_response_with_expected_status_returns_content(self):
+ # Checks for mitigations against the effects of
+ # that affect Python 3.4.1+, which
+ # manifest as `_download_webpage`, `_download_xml`, `_download_json`,
+ # or the underlying `_download_webpage_handle` returning no content
+ # when a response matches `expected_status`.
+
+ httpd = compat_http_server.HTTPServer(
+ ('127.0.0.1', 0), InfoExtractorTestRequestHandler)
+ port = http_server_port(httpd)
+ server_thread = threading.Thread(target=httpd.serve_forever)
+ server_thread.daemon = True
+ server_thread.start()
+
+ (content, urlh) = self.ie._download_webpage_handle(
+ 'http://127.0.0.1:%d/teapot' % port, None,
+ expected_status=TEAPOT_RESPONSE_STATUS)
+ self.assertEqual(content, TEAPOT_RESPONSE_BODY)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_compat.py b/test/test_compat.py
index d6c54e135..51fe6aa0b 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -39,7 +39,7 @@ class TestCompat(unittest.TestCase):
def test_compat_expanduser(self):
old_home = os.environ.get('HOME')
- test_str = 'C:\Documents and Settings\тест\Application Data'
+ test_str = r'C:\Documents and Settings\тест\Application Data'
compat_setenv('HOME', test_str)
self.assertEqual(compat_expanduser('~'), test_str)
compat_setenv('HOME', old_home or '')
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 5cf2bf1a5..750472281 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -9,26 +9,16 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import try_rm
+from test.helper import http_server_port, try_rm
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server
from youtube_dl.downloader.http import HttpFD
from youtube_dl.utils import encodeFilename
-import ssl
import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-def http_server_port(httpd):
- if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
- # In Jython SSLSocket is not a subclass of socket.socket
- sock = httpd.socket.sock
- else:
- sock = httpd.socket
- return sock.getsockname()[1]
-
-
TEST_SIZE = 10 * 1024
diff --git a/test/test_http.py b/test/test_http.py
index 409fec9c8..3ee0a5dda 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,6 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from test.helper import http_server_port
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server, compat_urllib_request
import ssl
@@ -16,15 +17,6 @@ import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-def http_server_port(httpd):
- if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
- # In Jython SSLSocket is not a subclass of socket.socket
- sock = httpd.socket.sock
- else:
- sock = httpd.socket
- return sock.getsockname()[1]
-
-
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py
index addb69d6f..4209d1d9a 100644
--- a/test/test_postprocessors.py
+++ b/test/test_postprocessors.py
@@ -14,4 +14,4 @@ from youtube_dl.postprocessor import MetadataFromTitlePP
class TestMetadataFromTitle(unittest.TestCase):
def test_format_to_regex(self):
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
- self.assertEqual(pp._titleregex, '(?P.+)\ \-\ (?P.+)')
+ self.assertEqual(pp._titleregex, r'(?P.+)\ \-\ (?P.+)')
diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py
index 01736872d..8b32aa886 100644
--- a/youtube_dl/extractor/americastestkitchen.py
+++ b/youtube_dl/extractor/americastestkitchen.py
@@ -43,10 +43,6 @@ class AmericasTestKitchenIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- partner_id = self._search_regex(
- r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
- webpage, 'kaltura partner id')
-
video_data = self._parse_json(
self._search_regex(
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*',
@@ -58,7 +54,18 @@ class AmericasTestKitchenIE(InfoExtractor):
(lambda x: x['episodeDetail']['content']['data'],
lambda x: x['videoDetail']['content']['data']), dict)
ep_meta = ep_data.get('full_video', {})
- external_id = ep_data.get('external_id') or ep_meta['external_id']
+
+ zype_id = ep_meta.get('zype_id')
+ if zype_id:
+ embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
+ ie_key = 'Zype'
+ else:
+ partner_id = self._search_regex(
+ r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
+ webpage, 'kaltura partner id')
+ external_id = ep_data.get('external_id') or ep_meta['external_id']
+ embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
+ ie_key = 'Kaltura'
title = ep_data.get('title') or ep_meta.get('title')
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
@@ -72,8 +79,8 @@ class AmericasTestKitchenIE(InfoExtractor):
return {
'_type': 'url_transparent',
- 'url': 'kaltura:%s:%s' % (partner_id, external_id),
- 'ie_key': 'Kaltura',
+ 'url': embed_url,
+ 'ie_key': ie_key,
'title': title,
'description': description,
'thumbnail': thumbnail,
diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py
index 6eb8bbb6e..883dcee7a 100644
--- a/youtube_dl/extractor/aparat.py
+++ b/youtube_dl/extractor/aparat.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
+ merge_dicts,
mimetype2ext,
url_or_none,
)
@@ -12,59 +13,83 @@ from ..utils import (
class AparatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.aparat.com/v/wP8On',
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
'info_dict': {
'id': 'wP8On',
'ext': 'mp4',
'title': 'تیم گلکسی 11 - زومیت',
- 'age_limit': 0,
+ 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
+ 'duration': 231,
+ 'timestamp': 1387394859,
+ 'upload_date': '20131218',
+ 'view_count': int,
},
- # 'skip': 'Extremely unreliable',
- }
+ }, {
+ # multiple formats
+ 'url': 'https://www.aparat.com/v/8dflw/',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- # Note: There is an easier-to-parse configuration at
- # http://www.aparat.com/video/video/config/videohash/%video_id
- # but the URL in there does not work
- webpage = self._download_webpage(
- 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
- video_id)
+ # Provides more metadata
+ webpage = self._download_webpage(url, video_id, fatal=False)
- title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
+ if not webpage:
+ # Note: There is an easier-to-parse configuration at
+ # http://www.aparat.com/video/video/config/videohash/%video_id
+ # but the URL in there does not work
+ webpage = self._download_webpage(
+ 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
+ video_id)
- file_list = self._parse_json(
+ options = self._parse_json(
self._search_regex(
- r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
- 'file list'),
+ r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P(?:(?!\1).)+)\1\s*\)',
+ webpage, 'options', group='value'),
video_id)
+ player = options['plugins']['sabaPlayerPlugin']
+
formats = []
- for item in file_list[0]:
- file_url = url_or_none(item.get('file'))
- if not file_url:
- continue
- ext = mimetype2ext(item.get('type'))
- label = item.get('label')
- formats.append({
- 'url': file_url,
- 'ext': ext,
- 'format_id': label or ext,
- 'height': int_or_none(self._search_regex(
- r'(\d+)[pP]', label or '', 'height', default=None)),
- })
- self._sort_formats(formats)
+ for sources in player['multiSRC']:
+ for item in sources:
+ if not isinstance(item, dict):
+ continue
+ file_url = url_or_none(item.get('src'))
+ if not file_url:
+ continue
+ item_type = item.get('type')
+ if item_type == 'application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ file_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ else:
+ ext = mimetype2ext(item.get('type'))
+ label = item.get('label')
+ formats.append({
+ 'url': file_url,
+ 'ext': ext,
+ 'format_id': 'http-%s' % (label or ext),
+ 'height': int_or_none(self._search_regex(
+ r'(\d+)[pP]', label or '', 'height',
+ default=None)),
+ })
+ self._sort_formats(
+ formats, field_preference=('height', 'width', 'tbr', 'format_id'))
- thumbnail = self._search_regex(
- r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
+ info = self._search_json_ld(webpage, video_id, default={})
- return {
+ if not info.get('title'):
+ info['title'] = player['title']
+
+ return merge_dicts(info, {
'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'age_limit': self._family_friendly_search(webpage),
+ 'thumbnail': url_or_none(options.get('poster')),
+ 'duration': int_or_none(player.get('duration')),
'formats': formats,
- }
+ })
diff --git a/youtube_dl/extractor/atvat.py b/youtube_dl/extractor/atvat.py
index 1584d53fc..95e572d70 100644
--- a/youtube_dl/extractor/atvat.py
+++ b/youtube_dl/extractor/atvat.py
@@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_data = self._parse_json(unescapeHTML(self._search_regex(
- r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
- webpage, 'player data')), display_id)['config']['initial_video']
+ [r'flashPlayerOptions\s*=\s*(["\'])(?P(?:(?!\1).)+)\1',
+ r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P[^"]+)"'],
+ webpage, 'player data', group='json')),
+ display_id)['config']['initial_video']
video_id = video_data['id']
video_title = video_data['title']
diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py
index 68f26e2ca..fcbdc71b9 100644
--- a/youtube_dl/extractor/azmedien.py
+++ b/youtube_dl/extractor/azmedien.py
@@ -1,213 +1,86 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
from .kaltura import KalturaIE
-from ..utils import (
- get_element_by_class,
- get_element_by_id,
- strip_or_none,
- urljoin,
-)
-class AZMedienBaseIE(InfoExtractor):
- def _kaltura_video(self, partner_id, entry_id):
- return self.url_result(
- 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
- video_id=entry_id)
-
-
-class AZMedienIE(AZMedienBaseIE):
+class AZMedienIE(InfoExtractor):
IE_DESC = 'AZ Medien videos'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
- (?:
+ (?P
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
- [0-9]+-show-[^/\#]+
- (?:
- /[0-9]+-episode-[^/\#]+
- (?:
- /[0-9]+-segment-(?:[^/\#]+\#)?|
- \#
- )|
- \#
+ [^/]+/
+ (?P
+ [^/]+-(?P\d+)
)
- (?P[^\#]+)
+ (?:
+ \#video=
+ (?P
+ [_0-9a-z]+
+ )
+ )?
'''
_TESTS = [{
- # URL with 'segment'
- 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
+ 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
'info_dict': {
- 'id': '1_2444peh4',
+ 'id': '1_anruz3wy',
'ext': 'mp4',
- 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
- 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
- 'uploader_id': 'TeleZ?ri',
- 'upload_date': '20161218',
- 'timestamp': 1482084490,
+ 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
+ 'uploader_id': 'TVOnline',
+ 'upload_date': '20180930',
+ 'timestamp': 1538328802,
},
'params': {
'skip_download': True,
},
}, {
- # URL with 'segment' and fragment:
- 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
- 'only_matching': True
- }, {
- # URL with 'episode' and fragment:
- 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
- 'only_matching': True
- }, {
- # URL with 'show' and fragment:
- 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
+ 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
'only_matching': True
}]
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- partner_id = self._search_regex(
- r'',
webpage, 'app state'), video_id)
video_data = {}
+ getters = (
+ lambda x, k=k: x['initialState']['content%s' % k]['content']
+ for k in ('Data', 'Detail')
+ )
for v in app_state.values():
- content = try_get(v, lambda x: x['initialState']['contentData']['content'], dict)
+ content = try_get(v, getters, dict)
if content and content.get('contentId') == video_id:
video_data = content
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py
index cb51cef2d..86c014b07 100644
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -15,7 +15,7 @@ from ..utils import (
class IviIE(InfoExtractor):
IE_DESC = 'ivi.ru'
IE_NAME = 'ivi'
- _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)'
+ _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)'
_GEO_BYPASS = False
_GEO_COUNTRIES = ['RU']
@@ -65,7 +65,11 @@ class IviIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'Only works from Russia',
- }
+ },
+ {
+ 'url': 'https://www.ivi.tv/watch/33560/',
+ 'only_matching': True,
+ },
]
# Sorted by quality
diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py
index d9f8dbfd2..62b28e980 100644
--- a/youtube_dl/extractor/joj.py
+++ b/youtube_dl/extractor/joj.py
@@ -61,7 +61,7 @@ class JojIE(InfoExtractor):
bitrates = self._parse_json(
self._search_regex(
- r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',
+ r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
default='{}'),
video_id, transform_source=js_to_json, fatal=False)
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index 04f68fce4..fdf7f5bbc 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -192,6 +192,8 @@ class KalturaIE(InfoExtractor):
'entryId': video_id,
'service': 'baseentry',
'ks': '{1:result:ks}',
+ 'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
+ 'responseProfile:type': 1,
},
{
'action': 'getbyentryid',
diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py
index c7f813370..fa217365a 100644
--- a/youtube_dl/extractor/laola1tv.py
+++ b/youtube_dl/extractor/laola1tv.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
import json
+import re
from .common import InfoExtractor
from ..utils import (
@@ -32,7 +33,8 @@ class Laola1TvEmbedIE(InfoExtractor):
def _extract_token_url(self, stream_access_url, video_id, data):
return self._download_json(
- stream_access_url, video_id, headers={
+ self._proto_relative_url(stream_access_url, 'https:'), video_id,
+ headers={
'Content-Type': 'application/json',
}, data=json.dumps(data).encode())['data']['stream-access'][0]
@@ -119,9 +121,59 @@ class Laola1TvEmbedIE(InfoExtractor):
}
-class Laola1TvIE(Laola1TvEmbedIE):
+class Laola1TvBaseIE(Laola1TvEmbedIE):
+ def _extract_video(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ if 'Dieser Livestream ist bereits beendet.' in webpage:
+ raise ExtractorError('This live stream has already finished.', expected=True)
+
+ conf = self._parse_json(self._search_regex(
+ r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
+ display_id,
+ transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s)))
+ video_id = conf['videoid']
+
+ config = self._download_json(conf['configUrl'], video_id, query={
+ 'videoid': video_id,
+ 'partnerid': conf['partnerid'],
+ 'language': conf.get('language', ''),
+ 'portal': conf.get('portalid', ''),
+ })
+ error = config.get('error')
+ if error:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ video_data = config['video']
+ title = video_data['title']
+ is_live = video_data.get('isLivestream') and video_data.get('isLive')
+ meta = video_data.get('metaInformation')
+ sports = meta.get('sports')
+ categories = sports.split(',') if sports else []
+
+ token_url = self._extract_token_url(
+ video_data['streamAccess'], video_id,
+ video_data['abo']['required'])
+
+ formats = self._extract_formats(token_url, video_id)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('image'),
+ 'categories': categories,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
+
+
+class Laola1TvIE(Laola1TvBaseIE):
IE_NAME = 'laola1tv'
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P[^/?#&]+)'
+
_TESTS = [{
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
'info_dict': {
@@ -169,52 +221,30 @@ class Laola1TvIE(Laola1TvEmbedIE):
}]
def _real_extract(self, url):
- display_id = self._match_id(url)
+ return self._extract_video(url)
- webpage = self._download_webpage(url, display_id)
- if 'Dieser Livestream ist bereits beendet.' in webpage:
- raise ExtractorError('This live stream has already finished.', expected=True)
+class EHFTVIE(Laola1TvBaseIE):
+ IE_NAME = 'ehftv'
+ _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P[^/?#&]+)'
- conf = self._parse_json(self._search_regex(
- r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
- display_id, js_to_json)
+ _TESTS = [{
+ 'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761',
+ 'info_dict': {
+ 'id': '1166761',
+ 'display_id': 'paris-saint-germain-handball-pge-vive-kielce',
+ 'ext': 'mp4',
+ 'title': 'Paris Saint-Germain Handball - PGE Vive Kielce',
+ 'is_live': False,
+ 'categories': ['Handball'],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
- video_id = conf['videoid']
-
- config = self._download_json(conf['configUrl'], video_id, query={
- 'videoid': video_id,
- 'partnerid': conf['partnerid'],
- 'language': conf.get('language', ''),
- 'portal': conf.get('portalid', ''),
- })
- error = config.get('error')
- if error:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
-
- video_data = config['video']
- title = video_data['title']
- is_live = video_data.get('isLivestream') and video_data.get('isLive')
- meta = video_data.get('metaInformation')
- sports = meta.get('sports')
- categories = sports.split(',') if sports else []
-
- token_url = self._extract_token_url(
- video_data['streamAccess'], video_id,
- video_data['abo']['required'])
-
- formats = self._extract_formats(token_url, video_id)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': self._live_title(title) if is_live else title,
- 'description': video_data.get('description'),
- 'thumbnail': video_data.get('image'),
- 'categories': categories,
- 'formats': formats,
- 'is_live': is_live,
- }
+ def _real_extract(self, url):
+ return self._extract_video(url)
class ITTFIE(InfoExtractor):
diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py
index 40295a30b..03f205144 100644
--- a/youtube_dl/extractor/libraryofcongress.py
+++ b/youtube_dl/extractor/libraryofcongress.py
@@ -16,16 +16,15 @@ from ..utils import (
class LibraryOfCongressIE(InfoExtractor):
IE_NAME = 'loc'
IE_DESC = 'Library of Congress'
- _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P[0-9a-z_.]+)'
_TESTS = [{
# embedded via