From aaf44a2f47f013e8d864ac9f98b2833904a8be78 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 13 Aug 2016 22:53:07 +0100 Subject: [PATCH] [uplynk] Add new extractor --- youtube_dl/downloader/hls.py | 6 +++ youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/uplynk.py | 64 ++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+) create mode 100644 youtube_dl/extractor/uplynk.py diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 3b7bb3508..8d7971e5d 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -20,6 +20,7 @@ from ..utils import ( encodeFilename, sanitize_open, parse_m3u8_attributes, + update_url_query, ) @@ -82,6 +83,7 @@ class HlsFD(FragmentFD): self._prepare_and_start_frag_download(ctx) + extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} @@ -95,6 +97,8 @@ class HlsFD(FragmentFD): if re.match(r'^https?://', line) else compat_urlparse.urljoin(man_url, line)) frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) + if extra_param_to_segment_url: + frag_url = update_url_query(frag_url, extra_param_to_segment_url) success = ctx['dl'].download(frag_filename, {'url': frag_url}) if not success: return False @@ -120,6 +124,8 @@ class HlsFD(FragmentFD): if not re.match(r'^https?://', decrypt_info['URI']): decrypt_info['URI'] = compat_urlparse.urljoin( man_url, decrypt_info['URI']) + if extra_param_to_segment_url: + decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_param_to_segment_url) decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 82d4ed153..901847509 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -926,6 +926,10 @@ from .udn import UDNEmbedIE from .digiteka import DigitekaIE from .unistra import UnistraIE from .uol import UOLIE +from .uplynk import ( + UplynkIE, + UplynkPreplayIE, +) from .urort import UrortIE from .urplay import URPlayIE from .usatoday import USATodayIE diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dl/extractor/uplynk.py new file mode 100644 index 000000000..a6a685c9d --- /dev/null +++ b/youtube_dl/extractor/uplynk.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + ExtractorError, +) + + +class UplynkIE(InfoExtractor): + _VALID_URL = r'https?://.*?\.uplynk\.com/(?Pext/[0-9a-f]{32}/(?P[^/?&]+)|(?P[0-9a-f]{32}))\.(?:m3u8|json)(?:.*?\bpbs=(?P[^&]+))?' + _TEST = { + 'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8', + 'info_dict': { + 'id': 'e89eaf2ce9054aa89d92ddb2d817a52e', + 'ext': 'mp4', + 'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4', + 'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + path, external_id, video_id, session_id = re.match(self._VALID_URL, url).groups() + display_id = video_id or external_id + formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4') + if session_id: + for f in formats: + f['extra_param_to_segment_url'] = { + 'pbs': session_id, + } + self._sort_formats(formats) + asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id) + if asset.get('error') == 1: + raise ExtractorError('% said: %s' % (self.IE_NAME, asset['msg']), expected=True) + + return { + 'id': asset['asset'], + 'title': asset['desc'], + 'thumbnail': asset.get('default_poster_url'), + 'duration': float_or_none(asset.get('duration')), + 'uploader_id': asset.get('owner'), + 'formats': formats, + } + + +class UplynkPreplayIE(InfoExtractor): + _VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?Pext/[0-9a-f]{32}/(?P[^/?&]+)|(?P[0-9a-f]{32}))\.json' + + def _real_extract(self, url): + path, external_id, video_id = re.match(self._VALID_URL, url).groups() + display_id = video_id or external_id + preplay = self._download_json(url, display_id) + content_url = 'http://content.uplynk.com/%s.m3u8' % path + session_id = preplay.get('sid') + if session_id: + content_url += '?pbs=' + session_id + return self.url_result(content_url, 'Uplynk')