From 7de0e5ad8017d4d061b6b715dd487cae5a0865d1 Mon Sep 17 00:00:00 2001 From: nixxo Date: Sat, 19 Sep 2020 09:07:41 +0200 Subject: [PATCH 1/6] [skyitalia] Add new extractor --- youtube_dl/extractor/extractors.py | 6 ++ youtube_dl/extractor/skyitalia.py | 146 +++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 youtube_dl/extractor/skyitalia.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ae7079a6a..11b0ac1a8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1016,6 +1016,12 @@ from .sky import ( SkyNewsIE, SkySportsIE, ) +from .skyitalia import ( + SkyVideoItIE, + SkySportItIE, + SkyTg24ItIE, + SkyArteItIE +) from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE diff --git a/youtube_dl/extractor/skyitalia.py b/youtube_dl/extractor/skyitalia.py new file mode 100644 index 000000000..c5c58880d --- /dev/null +++ b/youtube_dl/extractor/skyitalia.py @@ -0,0 +1,146 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class SkyItaliaBaseIE(InfoExtractor): + _GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}' + _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk' + _RES = { + 'low': [426, 240], + 'med': [640, 360], + 'high': [854, 480], + 'hd': [1280, 720] + } + + def _extract_video_id(self, url): + webpage = self._download_webpage(url, 'skysport') + video_id = self._html_search_regex( + [r'data-videoid=\"(\d+)\"', + r'http://player\.sky\.it/social\?id=(\d+)\&'], + webpage, 'video_id') + if video_id: + return video_id + raise ExtractorError('Video not found') + + def _get_formats(self, video_id, token=_TOKEN): + print(token) + data_url = self._GET_VIDEO_DATA.replace('{id}', video_id) + data_url = data_url.replace('{token}', token) + print(data_url) + video_data = self._parse_json( + self._download_webpage(data_url, video_id), + video_id + ) + + formats = [] + for q, r in self._RES.items(): + key = 'web_' + q + '_url' + if key not in video_data: + continue + formats.append({ + 'url': video_data[key], + 'format_id': q, + 'width': r[0], + 'height': r[1] + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_data['title'], + 'thumbnail': video_data['thumb'], + 'formats': formats + } + + +class SkyVideoItIE(SkyItaliaBaseIE): + IE_NAME = 'video.sky.it' + _VALID_URL = r'https?://video\.sky\.it/[0-9a-z-/]+-(?P[0-9]{6})(?:$|\?)' + + _TESTS = [ + { + 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162', + 'md5': '9c03b590b06e5952d8051f0e02b0feca', + 'info_dict': { + 'id': '616162', + 'ext': 'mp4', + 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', + } + }, + { + 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api', + 'only_matching': True, + } + ] + + def _real_extract(self, url): + return self._get_formats(self._match_id(url)) + + +class SkySportItIE(SkyItaliaBaseIE): + IE_NAME = 'sport.sky.it' + _VALID_URL = r'https?://sport\.sky\.it/.+?$' + + _TESTS = [ + { + 'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta', + 'md5': '9c03b590b06e5952d8051f0e02b0feca', + 'info_dict': { + 'id': '616162', + 'ext': 'mp4', + 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', + } + } + ] + + def _real_extract(self, url): + return self._get_formats(self._extract_video_id(url)) + + +class SkyTg24ItIE(SkyItaliaBaseIE): + IE_NAME = 'tg24.sky.it' + _VALID_URL = r'https?://tg24\.sky\.it/.+?$' + + _TESTS = [ + { + 'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi', + 'md5': 'caa25e62dadb529bc5e0b078da99f854', + 'info_dict': { + 'id': '615904', + 'ext': 'mp4', + 'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg', + } + } + ] + + def _real_extract(self, url): + return self._get_formats(self._extract_video_id(url)) + + +class SkyArteItIE(SkyItaliaBaseIE): + IE_NAME = 'arte.sky.it' + _VALID_URL = r'https?://arte\.sky\.it/video/.+?$' + + _TESTS = [ + { + 'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/', + 'md5': '2f22513a89f45142f2746f878d690647', + 'info_dict': { + 'id': '612888', + 'ext': 'mp4', + 'title': 'I maestri del cinema Federico Felini', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg', + } + } + ] + _TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd' + + def _real_extract(self, url): + return self._get_formats(self._extract_video_id(url), self._TOKEN) From e26f3ccf87ab0b9ec73b531826a6156793f8d635 Mon Sep 17 00:00:00 2001 From: nixxo Date: Sat, 19 Sep 2020 09:11:19 +0200 Subject: [PATCH 2/6] [skyitalia] fixed a string --- youtube_dl/extractor/skyitalia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/skyitalia.py b/youtube_dl/extractor/skyitalia.py index c5c58880d..929becda1 100644 --- a/youtube_dl/extractor/skyitalia.py +++ b/youtube_dl/extractor/skyitalia.py @@ -16,7 +16,7 @@ class SkyItaliaBaseIE(InfoExtractor): } def _extract_video_id(self, url): - webpage = self._download_webpage(url, 'skysport') + webpage = self._download_webpage(url, 'skyitalia') video_id = self._html_search_regex( [r'data-videoid=\"(\d+)\"', r'http://player\.sky\.it/social\?id=(\d+)\&'], From d664205e0ff9a2684da35a96a14dde57f464bf4a Mon Sep 17 00:00:00 2001 From: nixxo Date: Sat, 19 Sep 2020 13:07:57 +0200 Subject: [PATCH 3/6] removed BOM from file --- youtube_dl/extractor/skyitalia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/skyitalia.py b/youtube_dl/extractor/skyitalia.py index 929becda1..a7d641ee1 100644 --- a/youtube_dl/extractor/skyitalia.py +++ b/youtube_dl/extractor/skyitalia.py @@ -1,4 +1,4 @@ -# coding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor From 0429619162f18ca0f4907053fc3ba7661e0e52f3 Mon Sep 17 00:00:00 2001 From: nixxo Date: Sat, 19 Sep 2020 13:19:49 +0200 Subject: [PATCH 4/6] [skyitalia] removed unnecessary outputs --- youtube_dl/extractor/skyitalia.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/skyitalia.py b/youtube_dl/extractor/skyitalia.py index a7d641ee1..6f4fd3819 100644 --- a/youtube_dl/extractor/skyitalia.py +++ b/youtube_dl/extractor/skyitalia.py @@ -26,10 +26,8 @@ class SkyItaliaBaseIE(InfoExtractor): raise ExtractorError('Video not found') def _get_formats(self, video_id, token=_TOKEN): - print(token) data_url = self._GET_VIDEO_DATA.replace('{id}', video_id) data_url = data_url.replace('{token}', token) - print(data_url) video_data = self._parse_json( self._download_webpage(data_url, video_id), video_id From 88515d1f526e9da58d81dbeecdd0973c9c27e583 Mon Sep 17 00:00:00 2001 From: nixxo Date: Sat, 19 Sep 2020 13:24:09 +0200 Subject: [PATCH 5/6] [skyitalia] improved indentation --- youtube_dl/extractor/skyitalia.py | 88 ++++++++++++++----------------- 1 file changed, 40 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/skyitalia.py b/youtube_dl/extractor/skyitalia.py index 6f4fd3819..4d5b0bbd3 100644 --- a/youtube_dl/extractor/skyitalia.py +++ b/youtube_dl/extractor/skyitalia.py @@ -59,22 +59,19 @@ class SkyVideoItIE(SkyItaliaBaseIE): IE_NAME = 'video.sky.it' _VALID_URL = r'https?://video\.sky\.it/[0-9a-z-/]+-(?P[0-9]{6})(?:$|\?)' - _TESTS = [ - { - 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162', - 'md5': '9c03b590b06e5952d8051f0e02b0feca', - 'info_dict': { - 'id': '616162', - 'ext': 'mp4', - 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', - 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', - } - }, - { - 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api', - 'only_matching': True, + _TESTS = [{ + 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162', + 'md5': '9c03b590b06e5952d8051f0e02b0feca', + 'info_dict': { + 'id': '616162', + 'ext': 'mp4', + 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', } - ] + }, { + 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api', + 'only_matching': True, + }] def _real_extract(self, url): return self._get_formats(self._match_id(url)) @@ -84,18 +81,16 @@ class SkySportItIE(SkyItaliaBaseIE): IE_NAME = 'sport.sky.it' _VALID_URL = r'https?://sport\.sky\.it/.+?$' - _TESTS = [ - { - 'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta', - 'md5': '9c03b590b06e5952d8051f0e02b0feca', - 'info_dict': { - 'id': '616162', - 'ext': 'mp4', - 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', - 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', - } + _TESTS = [{ + 'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta', + 'md5': '9c03b590b06e5952d8051f0e02b0feca', + 'info_dict': { + 'id': '616162', + 'ext': 'mp4', + 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', } - ] + }] def _real_extract(self, url): return self._get_formats(self._extract_video_id(url)) @@ -105,18 +100,16 @@ class SkyTg24ItIE(SkyItaliaBaseIE): IE_NAME = 'tg24.sky.it' _VALID_URL = r'https?://tg24\.sky\.it/.+?$' - _TESTS = [ - { - 'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi', - 'md5': 'caa25e62dadb529bc5e0b078da99f854', - 'info_dict': { - 'id': '615904', - 'ext': 'mp4', - 'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti', - 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg', - } + _TESTS = [{ + 'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi', + 'md5': 'caa25e62dadb529bc5e0b078da99f854', + 'info_dict': { + 'id': '615904', + 'ext': 'mp4', + 'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg', } - ] + }] def _real_extract(self, url): return self._get_formats(self._extract_video_id(url)) @@ -126,18 +119,17 @@ class SkyArteItIE(SkyItaliaBaseIE): IE_NAME = 'arte.sky.it' _VALID_URL = r'https?://arte\.sky\.it/video/.+?$' - _TESTS = [ - { - 'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/', - 'md5': '2f22513a89f45142f2746f878d690647', - 'info_dict': { - 'id': '612888', - 'ext': 'mp4', - 'title': 'I maestri del cinema Federico Felini', - 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg', - } + _TESTS = [{ + 'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/', + 'md5': '2f22513a89f45142f2746f878d690647', + 'info_dict': { + 'id': '612888', + 'ext': 'mp4', + 'title': 'I maestri del cinema Federico Felini', + 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg', } - ] + }] + _TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd' def _real_extract(self, url): From 6b427abc4dc5988a9baf0fde94b9b57c521aa174 Mon Sep 17 00:00:00 2001 From: nixxo Date: Sun, 20 Sep 2020 10:33:26 +0200 Subject: [PATCH 6/6] [skyitalia] moved _real_extract --- youtube_dl/extractor/skyitalia.py | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/skyitalia.py b/youtube_dl/extractor/skyitalia.py index 4d5b0bbd3..c01072e43 100644 --- a/youtube_dl/extractor/skyitalia.py +++ b/youtube_dl/extractor/skyitalia.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ExtractorError @@ -54,6 +56,12 @@ class SkyItaliaBaseIE(InfoExtractor): 'formats': formats } + def _real_extract(self, url): + video_id = self._match_id(url) + if not re.match(r'^\d+$', video_id): + video_id = self._extract_video_id(url) + return self._get_formats(video_id, self._TOKEN) + class SkyVideoItIE(SkyItaliaBaseIE): IE_NAME = 'video.sky.it' @@ -73,14 +81,10 @@ class SkyVideoItIE(SkyItaliaBaseIE): 'only_matching': True, }] - def _real_extract(self, url): - return self._get_formats(self._match_id(url)) - class SkySportItIE(SkyItaliaBaseIE): IE_NAME = 'sport.sky.it' - _VALID_URL = r'https?://sport\.sky\.it/.+?$' - + _VALID_URL = r'https?://sport\.sky\.it/(?P.+?)$' _TESTS = [{ 'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta', 'md5': '9c03b590b06e5952d8051f0e02b0feca', @@ -92,14 +96,10 @@ class SkySportItIE(SkyItaliaBaseIE): } }] - def _real_extract(self, url): - return self._get_formats(self._extract_video_id(url)) - class SkyTg24ItIE(SkyItaliaBaseIE): IE_NAME = 'tg24.sky.it' - _VALID_URL = r'https?://tg24\.sky\.it/.+?$' - + _VALID_URL = r'https?://tg24\.sky\.it/(?P.+?)$' _TESTS = [{ 'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi', 'md5': 'caa25e62dadb529bc5e0b078da99f854', @@ -111,14 +111,10 @@ class SkyTg24ItIE(SkyItaliaBaseIE): } }] - def _real_extract(self, url): - return self._get_formats(self._extract_video_id(url)) - class SkyArteItIE(SkyItaliaBaseIE): IE_NAME = 'arte.sky.it' - _VALID_URL = r'https?://arte\.sky\.it/video/.+?$' - + _VALID_URL = r'https?://arte\.sky\.it/video/(?P.+?)$' _TESTS = [{ 'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/', 'md5': '2f22513a89f45142f2746f878d690647', @@ -129,8 +125,4 @@ class SkyArteItIE(SkyItaliaBaseIE): 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg', } }] - _TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd' - - def _real_extract(self, url): - return self._get_formats(self._extract_video_id(url), self._TOKEN)