From 725ab1196dec83135a29fab7c97c61a34f8ec418 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Tue, 5 May 2020 18:53:15 +0200 Subject: [PATCH] [ninateka] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/ninateka.py | 92 ++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 youtube_dl/extractor/ninateka.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4b3092028..b97e85873 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -729,6 +729,7 @@ from .nick import ( NickRuIE, ) from .niconico import NiconicoIE, NiconicoPlaylistIE +from .ninateka import NinatekaIE from .ninecninemedia import NineCNineMediaIE from .ninegag import NineGagIE from .ninenow import NineNowIE diff --git a/youtube_dl/extractor/ninateka.py b/youtube_dl/extractor/ninateka.py new file mode 100644 index 000000000..1053a20e8 --- /dev/null +++ b/youtube_dl/extractor/ninateka.py @@ -0,0 +1,92 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + js_to_json, +) + + +class NinatekaIE(InfoExtractor): + IE_NAME = 'ninateka' + IE_DESC = 'Ninateka' + _VALID_URL = r'https?://ninateka\.pl/film/(?P[^/\?#]+)' + _TEST = { + 'url': 'https://ninateka.pl/film/dziwne-przygody-kota-filemona-7', + 'md5': 'f39eebfad3a609df9c90a45a3155393d', + 'info_dict': { + 'id': 'dziwne-przygody-kota-filemona-7', + 'ext': 'mp4', + 'title': 'Dziwny świat kota Filemona | Poważne zmartwienie', + 'description': 'Filemon ma kłopot z własnym wyglądem, czy uda mu się z nim uporać?', + } + } + + def decode_url(self, encoded): + xor_val = ord('h') ^ ord(encoded[0]) + return ''.join(chr(ord(c) ^ xor_val) for c in encoded) + + def extract_formats(self, data, video_id, name): + info = self._parse_json(data, video_id, transform_source=js_to_json) + formats = [] + + for source_info in info['sources']: + url = self.decode_url(source_info['src']) + type_ = source_info.get('type') + + if type_ == 'application/vnd.ms-sstr+xml' or url.endswith('/Manifest'): + formats.extend(self._extract_ism_formats( + url, video_id, ism_id='mss-{}'.format(name), fatal=False)) + + elif type_ == 'application/x-mpegURL' or url.endswith('.m3u8'): + formats.extend(self._extract_m3u8_formats( + url, video_id, ext='mp4', m3u8_id='hls-{}'.format(name), fatal=False)) + + elif type_ == 'application/dash+xml' or url.endswith('.mpd'): + formats.extend(self._extract_mpd_formats( + url, video_id, mpd_id='dash-{}'.format(name), fatal=False)) + + elif url.endswith('.f4m'): + formats.extend(self._extract_f4m_formats( + url, video_id, f4m_id='hds-{}'.format(name), fatal=False)) + + else: + formats.append({ + 'format_id': 'direct-{}'.format(name), + 'url': url, + 'ext': determine_ext(url, 'mp4'), + }) + + return formats + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + main = self._search_regex( + r'(?m)(?:var|let|const)\s+playerOptionsWithMainSource\s*=\s*(\{.*?\})\s*;\s*?$', + webpage, 'main source') + formats = self.extract_formats(main, video_id, 'main') + + audiodesc = self._search_regex( + r'(?m)(?:var|let|const)\s+playerOptionsWithAudioDescriptionSource\s*=\s*(\{.*?\})\s*;\s*?$', + webpage, 'audio description', default=None) + if audiodesc: + formats.extend(self.extract_formats(audiodesc, video_id, 'audiodescription')) + + english_ver = self._search_regex( + r'(?m)(?:var|let|const)\s+playerOptionsWithEnglishVersion\s*=\s*(\{.*?\})\s*;\s*?$', + webpage, 'english version', default=None) + if english_ver: + formats.extend(self.extract_formats(english_ver, video_id, 'english')) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'formats': formats, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + }