1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-16 06:17:27 +00:00

[doodstream] fix and add more metadata

* metadata
      * fetch and decrypt video title
          * implemented doodExe
      * add filesize and duration
      * removed description
      * use _og_search_thumbnail
  * remove User-Agent from HTTP headers because it is not needed
This commit is contained in:
schnusch 2021-04-24 22:10:05 +02:00
parent 7496a0401c
commit 847a6464a8

View File

@ -7,10 +7,28 @@ import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
js_to_json,
urljoin, urljoin,
) )
def doodExe(crp, crs):
if crp == 'N_crp':
return crs
sorted_crp = ''.join(sorted(crp))
result = ''
for c in crs:
i = crp.find(c)
if i >= 0:
result += sorted_crp[i]
result = result.replace('+.+', '(')
result = result.replace('+..+', ')')
result = result.replace('+-+', '[')
result = result.replace('+--+', ']')
result = result.replace('+', ' ')
return result
class DoodStreamIE(InfoExtractor): class DoodStreamIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:doodstream\.com|dood\.(?:so|to|watch))/[de]/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?(?:doodstream\.com|dood\.(?:so|to|watch))/[de]/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
@ -21,6 +39,8 @@ class DoodStreamIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Big Buck Bunny Trailer', 'title': 'Big Buck Bunny Trailer',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'filesize': 4447915,
'duration': 33,
}, },
}] }]
@ -28,32 +48,44 @@ class DoodStreamIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
url = urljoin(url, '/e/' + video_id) url = urljoin(url, '/e/' + video_id)
referer = {'Referer': url}
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_meta(['og:title', 'twitter:title'], metadata_url = self._html_search_regex(r"('/cptr/[^']*')", webpage,
webpage, default=None) 'video metadata')
thumb = self._html_search_meta(['og:image', 'twitter:image'], metadata_url = self._parse_json(metadata_url, video_id,
webpage, default=None) transform_source=js_to_json)
metadata_url = urljoin(url, metadata_url)
metadata = self._download_json(metadata_url, video_id, headers=referer)
thumb = self._og_search_thumbnail(webpage)
try:
filesize = int(doodExe(**metadata['siz']), 10)
except (KeyError, ValueError):
filesize = None
try:
duration = int(doodExe(**metadata['len']), 10)
except (KeyError, ValueError):
duration = None
try:
title = doodExe(**metadata['ttl'])
except KeyError:
title = video_id
token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token') token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
description = self._html_search_meta(
['og:description', 'description', 'twitter:description'],
webpage, default=None)
auth_url = 'https://dood.to' + self._html_search_regex( auth_url = 'https://dood.to' + self._html_search_regex(
r'(/pass_md5.*?)\'', webpage, 'pass_md5') r'(/pass_md5.*?)\'', webpage, 'pass_md5')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
'referer': url
}
webpage = self._download_webpage(auth_url, video_id, headers=headers) webpage = self._download_webpage(auth_url, video_id, headers=referer)
final_url = webpage + ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(10)) + "?token=" + token + "&expiry=" + str(int(time.time() * 1000)) final_url = webpage + ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(10)) + "?token=" + token + "&expiry=" + str(int(time.time() * 1000))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'url': final_url, 'url': final_url,
'http_headers': headers, 'http_headers': referer,
'ext': 'mp4', 'ext': 'mp4',
'description': description,
'thumbnail': thumb, 'thumbnail': thumb,
'filesize': filesize,
'duration': duration,
} }