youtube-dl/youtube_dl/extractor/tunein.py

# coding: utf-8
from __future__ import unicode_literals

import json
import re

from .common import InfoExtractor
from ..utils import ExtractorError


class TuneInIE(InfoExtractor):
    _VALID_URL = r'''(?x)https?://(?:www\.)?
    (?:
        tunein\.com/
        (?:
            radio/.*?-s|
            station/.*?StationId\=
        )(?P<id>[0-9]+)
        |tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
    )
    '''

    _INFO_DICT = {
        'id': '34682',
        'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
        'ext': 'AAC',
        'thumbnail': 're:^https?://.*\.png$',
        'location': 'Tacoma, WA',
    }
    _TESTS = [
        {
            'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
            'info_dict': _INFO_DICT,
            'params': {
                'skip_download': True,  # live stream
            },
        },
        {  # test redirection
            'url': 'http://tun.in/ser7s',
            'info_dict': _INFO_DICT,
            'params': {
                'skip_download': True,  # live stream
            },
        },
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        redirect_id = mobj.group('redirect_id')
        if redirect_id:
            # The server doesn't support HEAD requests
            urlh = self._request_webpage(
                url, redirect_id, note='Downloading redirect page')
            url = urlh.geturl()
            self.to_screen('Following redirect: %s' % url)
            mobj = re.match(self._VALID_URL, url)
        station_id = mobj.group('id')

        webpage = self._download_webpage(
            url, station_id, note='Downloading station webpage')

        payload = self._html_search_regex(
            r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
        json_data = json.loads(payload)
        station_info = json_data['Station']['broadcast']
        title = station_info['Title']
        thumbnail = station_info.get('Logo')
        location = station_info.get('Location')
        streams_url = station_info.get('StreamUrl')
        if not streams_url:
            raise ExtractorError('No downloadable streams found',
                                 expected=True)
        stream_data = self._download_webpage(
            streams_url, station_id, note='Downloading stream data')
        streams = json.loads(self._search_regex(
            r'\((.*)\);', stream_data, 'stream info'))['Streams']

        is_live = None
        formats = []
        for stream in streams:
            if stream.get('Type') == 'Live':
                is_live = True
            formats.append({
                'abr': stream.get('Bandwidth'),
                'ext': stream.get('MediaType'),
                'acodec': stream.get('MediaType'),
                'vcodec': 'none',
                'url': stream.get('Url'),
                # Sometimes streams with the highest quality do not exist
                'preference': stream.get('Reliability'),
            })
        self._sort_formats(formats)

        return {
            'id': station_id,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
            'location': location,
            'is_live': is_live,
        }
[tunein] Add new extractor (Closes #4097) 2014-11-24 21:15:33 +00:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`import json`
			`import re`

			`from .common import InfoExtractor`
			`from ..utils import ExtractorError`


			`class TuneInIE(InfoExtractor):`
			`_VALID_URL = r'''(?x)https?://(?:www\.)?`
			`(?:`
			`tunein\.com/`
			`(?:`
			`radio/.*?-s\|`
			`station/.*?StationId\=`
			`)(?P<id>[0-9]+)`
			`\|tun\.in/(?P<redirect_id>[A-Za-z0-9]+)`
			`)`
			`'''`

			`_INFO_DICT = {`
			`'id': '34682',`
			`'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',`
			`'ext': 'AAC',`
			`'thumbnail': 're:^https?://.*\.png$',`
			`'location': 'Tacoma, WA',`
			`}`
			`_TESTS = [`
			`{`
			`'url': 'http://tunein.com/radio/Jazz24-885-s34682/',`
			`'info_dict': _INFO_DICT,`
			`'params': {`
			`'skip_download': True, # live stream`
			`},`
			`},`
			`{ # test redirection`
			`'url': 'http://tun.in/ser7s',`
			`'info_dict': _INFO_DICT,`
			`'params': {`
			`'skip_download': True, # live stream`
			`},`
			`},`
			`]`

			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`redirect_id = mobj.group('redirect_id')`
			`if redirect_id:`
			`# The server doesn't support HEAD requests`
			`urlh = self._request_webpage(`
			`url, redirect_id, note='Downloading redirect page')`
			`url = urlh.geturl()`
			`self.to_screen('Following redirect: %s' % url)`
			`mobj = re.match(self._VALID_URL, url)`
			`station_id = mobj.group('id')`

			`webpage = self._download_webpage(`
			`url, station_id, note='Downloading station webpage')`

			`payload = self._html_search_regex(`
			`r'(?m)TuneIn\.payload\s=\s(\{[^$]+?)$', webpage, 'JSON data')`
			`json_data = json.loads(payload)`
			`station_info = json_data['Station']['broadcast']`
			`title = station_info['Title']`
			`thumbnail = station_info.get('Logo')`
			`location = station_info.get('Location')`
			`streams_url = station_info.get('StreamUrl')`
			`if not streams_url:`
			`raise ExtractorError('No downloadable streams found',`
			`expected=True)`
			`stream_data = self._download_webpage(`
			`streams_url, station_id, note='Downloading stream data')`
			`streams = json.loads(self._search_regex(`
			`r'\((.*)\);', stream_data, 'stream info'))['Streams']`

			`is_live = None`
			`formats = []`
			`for stream in streams:`
			`if stream.get('Type') == 'Live':`
			`is_live = True`
			`formats.append({`
			`'abr': stream.get('Bandwidth'),`
			`'ext': stream.get('MediaType'),`
			`'acodec': stream.get('MediaType'),`
			`'vcodec': 'none',`
			`'url': stream.get('Url'),`
			`# Sometimes streams with the highest quality do not exist`
			`'preference': stream.get('Reliability'),`
			`})`
			`self._sort_formats(formats)`

			`return {`
			`'id': station_id,`
			`'title': title,`
			`'formats': formats,`
			`'thumbnail': thumbnail,`
			`'location': location,`
			`'is_live': is_live,`
			`}`