Merge 5b088ccdd9 into c5098961b0

Fixed linter warnings.
Fixed flake8 errors.
2024-12-22 16:06:49 +00:00 · 2024-08-09 17:31:19 -06:00 · 2024-07-03 14:31:36 +03:00 · 2024-06-24 23:12:03 +03:00 · 2024-06-24 22:11:06 +03:00 · 2024-06-24 17:40:43 +03:00
2 changed files with 77 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -560,6 +560,7 @@ from .jwplatform import JWPlatformIE
 from .kakao import KakaoIE
 from .kaltura import KalturaIE
 from .kankan import KankanIE
+from .kankids import KanKidsIE
 from .karaoketv import KaraoketvIE
 from .karrierevideos import KarriereVideosIE
 from .keezmovies import KeezMoviesIE
--- a/youtube_dl/extractor/kankids.py
+++ b/youtube_dl/extractor/kankids.py
@ -0,0 +1,76 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+import re
+
+CONTENT_DIR = r'/content/kids/'
+DOMAIN = r'kankids.org.il'
+
+
+class KanKidsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?' +\
+        DOMAIN.replace('.', '\\.') + CONTENT_DIR +\
+        r'(?P<category>[a-z]+)-main/(?P<id>[\w\-0-9]+)/(?P<season>\w+)?/?$'
+    _TESTS = [
+        {
+            'url': 'https://www.kankids.org.il/content/kids/ktantanim-main/p-11732/',
+            'info_dict': {
+                '_type': 'playlist',
+                'id': 'p-11732',
+                'title': 'בית ספר לקוסמים',
+            },
+            'playlist_count': 60,
+        },
+        {
+            'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/cramel_main/s1/',
+            'info_dict': {
+                '_type': 'playlist',
+                'id': 'cramel_main',
+                'title': 'כראמל - עונה 1',
+            },
+            'playlist_count': 21,
+        },
+    ]
+
+    def _real_extract(self, url):
+        m = super()._match_valid_url(url)
+        series_id = m.group('id')
+        category = m.group('category')
+        playlist_season = m.group('season')
+
+        webpage = self._download_webpage(url, series_id)
+
+        title_pattern = r'<title>(?P<title>.+) \|'
+        series_title = re.search(title_pattern, webpage)
+        if not series_title:
+            series_title = re.search(title_pattern[:-1] + r'-', webpage)
+        if series_title:
+            series_title = series_title.group('title')
+
+        season = playlist_season if playlist_season else r'(?P<season>\w+)'
+        content_dir = CONTENT_DIR + category + r'-main/'
+        playlist = set(re.findall(
+            r'href="' + content_dir         # Content dir
+            + series_id + r'/'              # Series
+            + season + r'/'                 # Season
+            + r'(?P<id>[0-9]+)/"'           # Episode
+            + r'.+title="(?P<title>.+)"',   # Title
+            webpage))
+
+        entries = []
+        content_dir = r'https://www.' + DOMAIN + content_dir
+        for season, video_id, title in playlist if not playlist_season else map(lambda episode: (playlist_season,) + episode, playlist):
+            entries.append(self.url_result(
+                content_dir + season + r'/' + video_id + r'/',
+                ie='Generic',
+                video_id=video_id,
+                video_title=title,
+            ))
+
+        return {
+            '_type': 'playlist',
+            'id': series_id,
+            'title': series_title,
+            'entries': entries,
+        }
Author	SHA1	Message	Date
Pavel	86bd8f6938	Merge `5b088ccdd9` into `c5098961b0`	2024-08-09 17:31:19 -06:00
deepspy	5b088ccdd9	Fixed linter warnings.	2024-07-03 14:31:36 +03:00
deepspy	a4737bb755	Fixed flake8 errors.	2024-06-24 23:12:03 +03:00
deepspy	d3e980eaa5	Some cleanup of the kan kids extractor.	2024-06-24 22:11:06 +03:00
deepspy	162eb5632b	Added unittests for kankids extractor. Fixed a non p- playlist id matching bug.	2024-06-24 17:40:43 +03:00
deepspy	c9265f6d60	Fixed a series title location bug.	2024-06-24 16:46:29 +03:00
deepspy	3fb423c0bf	First working version of the kan kids extractor.	2024-06-24 16:25:34 +03:00
deepspy	d335e0beec	Extracted episode list.	2024-06-24 14:22:56 +03:00
deepspy	833fe8c9af	Added a basic shell for a kankids extractor.	2024-06-24 00:10:22 +03:00