youtube-dl/youtube_dl/extractor/loom.py

from __future__ import unicode_literals

import json
import re

from .common import InfoExtractor

from ..compat import (
    compat_urllib_parse_unquote,
    compat_urllib_parse_urlencode,
    compat_urllib_request
)
from ..utils import (
    js_to_json,
    try_get,
    unified_timestamp,
    url_or_none
)


class LoomBaseInfoIE(InfoExtractor):
    _BASE_URL = 'https://www.loom.com/'


class LoomIE(LoomBaseInfoIE):
    _VALID_URL = r'https?://(?:www\.)?loom\.com/share/(?!folder)(?P<id>[a-zA-Z0-9]+)'
    _TESTS = [
        {
            'url': 'https://www.loom.com/share/31b41727a5b24dacb6c1417a565b2ebf',
            'md5': '8b94361aabff2075141dc60bd6d35453',
            'info_dict': {
                'id': '31b41727a5b24dacb6c1417a565b2ebf',
                'ext': 'mp4',
                'title': 'How to resize your camera bubble',
                'uploader': 'Allie Hitchcock',
                'upload_date': '20201007',
                'timestamp': 1602089241
            }
        },
        {
            'url': 'https://www.loom.com/share/7e5168ec3b0744cab5e08a340cc7e086',
            'md5': '47dd14aa1d8054c249b68ca57ad9963f',
            'info_dict': {
                'id': '7e5168ec3b0744cab5e08a340cc7e086',
                'ext': 'mp4',
                'title': 'How to flip your camera ',
                'uploader': 'Matthew Flores',
                'upload_date': '20200423',
                'timestamp': 1587646164
            }
        },
        {
            'url': 'https://www.loom.com/share/6670e3eba3c84dc09ada8306c7138075',
            'md5': 'bfad8181ed49d6252b10dfdeb46c535e',
            'info_dict': {
                'id': '6670e3eba3c84dc09ada8306c7138075',
                'ext': 'mp4',
                'title': 'How to record your first video on Loom',
                'uploader': 'Allie Hitchcock',
                'upload_date': '20201118',
                'timestamp': 1605729404
            }
        }
    ]

    def _extract_video_info_json(self, webpage, video_id):
        info = self._html_search_regex(
            r'window.loomSSRVideo = (.+?);',
            webpage,
            'info')
        return self._parse_json(info, 'json', js_to_json)

    def _get_url_by_id_type(self, video_id, type):
        request = compat_urllib_request.Request(
            self._BASE_URL + 'api/campaigns/sessions/' + video_id + '/' + type,
            {})
        json_doc = self._download_json(request, video_id)
        return (url_or_none(json_doc.get('url')), json_doc.get('part_credentials'))

    def _get_m3u8_formats(self, url, video_id, credentials):
        format_list = self._extract_m3u8_formats(url, video_id)
        for item in format_list:
            item['protocol'] = 'm3u8_native'
            item['url'] += '?' + credentials
            item['ext'] = 'mp4'
            item['format_id'] = 'hls-' + str(item.get('height', 0))
            item['extra_param_to_segment_url'] = credentials
        return format_list

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        info = self._extract_video_info_json(webpage, video_id)

        formats = []
        for type in ['transcoded-url', 'raw-url']:
            (url, part_credentials) = self._get_url_by_id_type(video_id, type)
            ext = self._search_regex(
                r'\.([a-zA-Z0-9]+)\?',
                url, 'ext', default=None)
            if(ext != 'm3u8'):
                formats.append({
                    'url': url,
                    'ext': ext,
                    'format_id': type,
                    'width': try_get(info, lambda x: x['video_properties']['width']),
                    'height': try_get(info, lambda x: x['video_properties']['height'])
                })
            else:
                credentials = compat_urllib_parse_urlencode(part_credentials)
                m3u8_formats = self._get_m3u8_formats(url, video_id, credentials)
                for i in range(len(m3u8_formats)):
                    formats.insert(
                        (-1, len(formats))[i == len(m3u8_formats) - 1],
                        m3u8_formats[i])

        return {
            'id': info.get('id'),
            'title': info.get('name'),
            'formats': formats,
            'thumbnails': [
                {
                    'id': key,
                    'url': url_or_none(self._BASE_URL + value)
                } for key, value in info.get('thumbnails').items()
            ],
            'description': info.get('description'),
            'uploader': info.get('owner_full_name'),
            'timestamp': unified_timestamp(info.get('createdAt'))
        }


class LoomFolderIE(LoomBaseInfoIE):
    _VALID_URL = r'https?://(?:www\.)?loom\.com/share/folder/(?P<id>.+)/?'
    _TESTS = [
        {
            'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c/List%20B-%20e%2C%20u',
            'info_dict': {
                'id': 'b14bf2c5ef434bca8ab3585b0c1e97d9',
                'title': 'List B- e, u'
            },
            'playlist_mincount': 4
        },
        {
            'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c',
            'info_dict': {
                'id': '997db4db046f43e5912f10dc5f817b5c',
                'title': 'Blending Lessons '
            },
            'playlist_mincount': 16
        }
    ]

    def _get_real_folder_id(self, path):
        subfolders = re.match(
            r'^([a-zA-Z0-9]+)(?:\/(.+))*$',
            compat_urllib_parse_unquote(path))
        folder_names = subfolders.groups()[1:]
        parent_folder_id = subfolders.group(1)
        if(folder_names[0] is None):
            return path

        # Fetch folder id
        request = compat_urllib_request.Request(
            self._BASE_URL + 'v1/folders/by_name',
            json.dumps({
                'folder_names': folder_names,
                'parent_folder_id': parent_folder_id
            }).encode('utf-8'))
        json_doc = self._download_json(request, parent_folder_id)

        return try_get(json_doc, lambda x: x['current_folder']['id'])

    def _get_folder_info(self, folder_id):
        json_doc = self._download_json(url_or_none(self._BASE_URL + 'v1/folders/' + folder_id), folder_id)
        videos = []

        # Recursive call for subfolder
        for folder in json_doc.get('folders'):
            subfolder_info = self._get_folder_info(folder.get('id'))
            videos.extend(subfolder_info.get('entries'))
        videos.extend([val.get('id') for val in json_doc.get('videos')])

        return {
            'id': folder_id,
            'title': json_doc.get('name'),
            'description': json_doc.get('description'),
            'entries': videos
        }

    def _real_extract(self, url):
        folder_id = self._match_id(url)
        folder_id = self._get_real_folder_id(folder_id)
        folder_info = self._get_folder_info(folder_id)
        folder_info['_type'] = 'playlist'

        for i in range(len(folder_info['entries'])):
            video_id = folder_info['entries'][i]
            folder_info['entries'][i] = LoomIE(self._downloader)._real_extract(url_or_none(self._BASE_URL + 'share/' + video_id))

        return folder_info
[Loom] Add new extractor 2021-02-01 08:00:24 +00:00			`from __future__ import unicode_literals`

[Loom] Add: Additional playlist extractor for folder support 2021-02-03 16:18:10 +00:00			`import json`
			`import re`

[Loom] Add new extractor 2021-02-01 08:00:24 +00:00			`from .common import InfoExtractor`

			`from ..compat import (`
[Loom] Add: Additional playlist extractor for folder support 2021-02-03 16:18:10 +00:00			`compat_urllib_parse_unquote,`
[Loom] Add new extractor 2021-02-01 08:00:24 +00:00			`compat_urllib_parse_urlencode,`
			`compat_urllib_request`
			`)`
			`from ..utils import (`
			`js_to_json,`
			`try_get,`
			`unified_timestamp,`
			`url_or_none`
			`)`


			`class LoomBaseInfoIE(InfoExtractor):`
			`_BASE_URL = 'https://www.loom.com/'`


			`class LoomIE(LoomBaseInfoIE):`
[Loom] Add: Additional playlist extractor for folder support 2021-02-03 16:18:10 +00:00			`_VALID_URL = r'https?://(?:www\.)?loom\.com/share/(?!folder)(?P<id>[a-zA-Z0-9]+)'`
[Loom] Add new extractor 2021-02-01 08:00:24 +00:00			`_TESTS = [`
			`{`
			`'url': 'https://www.loom.com/share/31b41727a5b24dacb6c1417a565b2ebf',`
			`'md5': '8b94361aabff2075141dc60bd6d35453',`
			`'info_dict': {`
			`'id': '31b41727a5b24dacb6c1417a565b2ebf',`
			`'ext': 'mp4',`
			`'title': 'How to resize your camera bubble',`
			`'uploader': 'Allie Hitchcock',`
			`'upload_date': '20201007',`
			`'timestamp': 1602089241`
			`}`
			`},`
			`{`
			`'url': 'https://www.loom.com/share/7e5168ec3b0744cab5e08a340cc7e086',`
			`'md5': '47dd14aa1d8054c249b68ca57ad9963f',`
			`'info_dict': {`
			`'id': '7e5168ec3b0744cab5e08a340cc7e086',`
			`'ext': 'mp4',`
			`'title': 'How to flip your camera ',`
			`'uploader': 'Matthew Flores',`
			`'upload_date': '20200423',`
			`'timestamp': 1587646164`
			`}`
			`},`
			`{`
			`'url': 'https://www.loom.com/share/6670e3eba3c84dc09ada8306c7138075',`
			`'md5': 'bfad8181ed49d6252b10dfdeb46c535e',`
			`'info_dict': {`
			`'id': '6670e3eba3c84dc09ada8306c7138075',`
			`'ext': 'mp4',`
			`'title': 'How to record your first video on Loom',`
			`'uploader': 'Allie Hitchcock',`
			`'upload_date': '20201118',`
			`'timestamp': 1605729404`
			`}`
			`}`
			`]`

[Loom] Update: Move related member functions into LoomIE 2021-02-03 16:06:40 +00:00			`def _extract_video_info_json(self, webpage, video_id):`
			`info = self._html_search_regex(`
			`r'window.loomSSRVideo = (.+?);',`
			`webpage,`
			`'info')`
			`return self._parse_json(info, 'json', js_to_json)`

			`def _get_url_by_id_type(self, video_id, type):`
			`request = compat_urllib_request.Request(`
			`self._BASE_URL + 'api/campaigns/sessions/' + video_id + '/' + type,`
			`{})`
[Loom] Add: Additional playlist extractor for folder support 2021-02-03 16:18:10 +00:00			`json_doc = self._download_json(request, video_id)`
			`return (url_or_none(json_doc.get('url')), json_doc.get('part_credentials'))`
[Loom] Update: Move related member functions into LoomIE 2021-02-03 16:06:40 +00:00
			`def _get_m3u8_formats(self, url, video_id, credentials):`
			`format_list = self._extract_m3u8_formats(url, video_id)`
			`for item in format_list:`
			`item['protocol'] = 'm3u8_native'`
			`item['url'] += '?' + credentials`
			`item['ext'] = 'mp4'`
			`item['format_id'] = 'hls-' + str(item.get('height', 0))`
			`item['extra_param_to_segment_url'] = credentials`
			`return format_list`

[Loom] Add new extractor 2021-02-01 08:00:24 +00:00			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`
			`webpage = self._download_webpage(url, video_id)`

			`info = self._extract_video_info_json(webpage, video_id)`

			`formats = []`
			`for type in ['transcoded-url', 'raw-url']:`
			`(url, part_credentials) = self._get_url_by_id_type(video_id, type)`
			`ext = self._search_regex(`
			`r'\.([a-zA-Z0-9]+)\?',`
			`url, 'ext', default=None)`
			`if(ext != 'm3u8'):`
			`formats.append({`
			`'url': url,`
			`'ext': ext,`
			`'format_id': type,`
			`'width': try_get(info, lambda x: x['video_properties']['width']),`
			`'height': try_get(info, lambda x: x['video_properties']['height'])`
			`})`
			`else:`
			`credentials = compat_urllib_parse_urlencode(part_credentials)`
			`m3u8_formats = self._get_m3u8_formats(url, video_id, credentials)`
			`for i in range(len(m3u8_formats)):`
			`formats.insert(`
			`(-1, len(formats))[i == len(m3u8_formats) - 1],`
			`m3u8_formats[i])`

			`return {`
			`'id': info.get('id'),`
			`'title': info.get('name'),`
			`'formats': formats,`
			`'thumbnails': [`
			`{`
			`'id': key,`
			`'url': url_or_none(self._BASE_URL + value)`
			`} for key, value in info.get('thumbnails').items()`
			`],`
			`'description': info.get('description'),`
			`'uploader': info.get('owner_full_name'),`
			`'timestamp': unified_timestamp(info.get('createdAt'))`
			`}`
[Loom] Add: Additional playlist extractor for folder support 2021-02-03 16:18:10 +00:00

			`class LoomFolderIE(LoomBaseInfoIE):`
			`_VALID_URL = r'https?://(?:www\.)?loom\.com/share/folder/(?P<id>.+)/?'`
			`_TESTS = [`
			`{`
			`'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c/List%20B-%20e%2C%20u',`
			`'info_dict': {`
			`'id': 'b14bf2c5ef434bca8ab3585b0c1e97d9',`
			`'title': 'List B- e, u'`
			`},`
			`'playlist_mincount': 4`
			`},`
			`{`
			`'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c',`
			`'info_dict': {`
			`'id': '997db4db046f43e5912f10dc5f817b5c',`
			`'title': 'Blending Lessons '`
			`},`
			`'playlist_mincount': 16`
			`}`
			`]`

			`def _get_real_folder_id(self, path):`
			`subfolders = re.match(`
			`r'^([a-zA-Z0-9]+)(?:\/(.+))*$',`
			`compat_urllib_parse_unquote(path))`
			`folder_names = subfolders.groups()[1:]`
			`parent_folder_id = subfolders.group(1)`
			`if(folder_names[0] is None):`
			`return path`

			`# Fetch folder id`
			`request = compat_urllib_request.Request(`
			`self._BASE_URL + 'v1/folders/by_name',`
			`json.dumps({`
			`'folder_names': folder_names,`
			`'parent_folder_id': parent_folder_id`
			`}).encode('utf-8'))`
			`json_doc = self._download_json(request, parent_folder_id)`

			`return try_get(json_doc, lambda x: x['current_folder']['id'])`

			`def _get_folder_info(self, folder_id):`
			`json_doc = self._download_json(url_or_none(self._BASE_URL + 'v1/folders/' + folder_id), folder_id)`
			`videos = []`

			`# Recursive call for subfolder`
			`for folder in json_doc.get('folders'):`
			`subfolder_info = self._get_folder_info(folder.get('id'))`
			`videos.extend(subfolder_info.get('entries'))`
			`videos.extend([val.get('id') for val in json_doc.get('videos')])`

			`return {`
			`'id': folder_id,`
			`'title': json_doc.get('name'),`
			`'description': json_doc.get('description'),`
			`'entries': videos`
			`}`

			`def _real_extract(self, url):`
			`folder_id = self._match_id(url)`
			`folder_id = self._get_real_folder_id(folder_id)`
			`folder_info = self._get_folder_info(folder_id)`
			`folder_info['_type'] = 'playlist'`

			`for i in range(len(folder_info['entries'])):`
			`video_id = folder_info['entries'][i]`
			`folder_info['entries'][i] = LoomIE(self._downloader)._real_extract(url_or_none(self._BASE_URL + 'share/' + video_id))`

			`return folder_info`