From bd65f181532ab4e535b408d3ccf99723534eb326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 24 Jun 2017 18:33:31 +0700 Subject: [PATCH] [onetpl] Add support for videos embedded via pulsembed (closes #13482) --- youtube_dl/extractor/onet.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 94f57990b..58da1bc27 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -11,6 +11,7 @@ from ..utils import ( get_element_by_class, int_or_none, js_to_json, + NO_DEFAULT, parse_iso8601, remove_start, strip_or_none, @@ -198,6 +199,19 @@ class OnetPlIE(InfoExtractor): 'upload_date': '20170214', 'timestamp': 1487078046, }, + }, { + # embedded via pulsembed + 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0', + 'info_dict': { + 'id': '501235.965429946', + 'ext': 'mp4', + 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu', + 'upload_date': '20170622', + 'timestamp': 1498159955, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', 'only_matching': True, @@ -212,13 +226,25 @@ class OnetPlIE(InfoExtractor): 'only_matching': True, }] + def _search_mvp_id(self, webpage, default=NO_DEFAULT): + return self._search_regex( + r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id', + default=default) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - mvp_id = self._search_regex( - r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id') + mvp_id = self._search_mvp_id(webpage, default=None) + + if not mvp_id: + pulsembed_url = self._search_regex( + r'data-src=(["\'])(?P(?:https?:)?//pulsembed\.eu/.+?)\1', + webpage, 'pulsembed url', group='url') + webpage = self._download_webpage( + pulsembed_url, video_id, 'Downloading pulsembed webpage') + mvp_id = self._search_mvp_id(webpage) return self.url_result( 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)