From 2a2d109acd47ae3caecabd3c538ecbedc6f8b7cf Mon Sep 17 00:00:00 2001 From: Fran Hermoso Date: Tue, 12 May 2020 16:54:29 +0200 Subject: [PATCH] [Doramasmp4] Add new extractor --- youtube_dl/extractor/doramasmp4.py | 111 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 112 insertions(+) create mode 100644 youtube_dl/extractor/doramasmp4.py diff --git a/youtube_dl/extractor/doramasmp4.py b/youtube_dl/extractor/doramasmp4.py new file mode 100644 index 000000000..67cba9d3c --- /dev/null +++ b/youtube_dl/extractor/doramasmp4.py @@ -0,0 +1,111 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import clean_html, ExtractorError + + +class Doramasmp4IE(InfoExtractor): + _VALID_URL = r'https?://(?:www8\.)?doramasmp4\.com/(?P[^/]+)' + _TESTS = [ + { + 'url': 'https://www8.doramasmp4.com/the-man-inside-me/', + 'info_dict': { + 'id': 'the-man-inside-me', + 'title': 'The Man Inside Me', + 'ext': 'mp4' + } + }, + { + 'url': 'https://www8.doramasmp4.com/the-painter-of-the-wind-capitulo-1/', + 'info_dict': { + 'id': 'the-painter-of-the-wind-capitulo-1', + 'title': 'The Painter of the Wind Capítulo 1 sub español', + 'ext': 'mp4' + } + }, + { + 'url': 'https://www8.doramasmp4.com/princess-silver-capitulo-1/', + 'info_dict': { + 'id': 'princess-silver-capitulo-1', + 'title': 'Princess Silver Capítulo 1 sub español', + 'ext': 'mp4' + } + }, + { + 'url': 'https://www8.doramasmp4.com/the-painter-of-the-wind/', + 'info_dict': { + 'id': 'the-painter-of-the-wind', + }, + 'playlist_count': 20 + }, + { + 'url': 'https://www8.doramasmp4.com/princess-silver/', + 'info_dict': { + 'id': 'princess-silver', + }, + 'playlist_count': 58 + } + ] + + def _find_sources(self, content, video_id): + videos = self._parse_json( + self._html_search_regex( + r'var sources = (?P.*);', content, 'url' + ), video_id + ) + return next(iter(videos), {}).get('file') + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage).replace(' | Doramasmp4.com', '') + + try: + is_playlist = self._html_search_meta( + 'article:section', webpage + ) == 'tv' + except ExtractorError: + is_playlist = False + + if is_playlist: + matches = re.findall( + r']*?\s+)?href=(")?(?P.+?{}.*?)\1'.format( + video_id + ), + webpage + ) + entries = [] + for match in matches: + entries.append(self.url_result(match[1], ie='Doramasmp4')) + + return self.playlist_result(entries, playlist_id=video_id) + else: + original = self._html_search_regex( + r'data-link\s*=\s*"(?P.+?)"', webpage, 'url' + ) + first = self._download_webpage(original, video_id) + second = self._download_webpage( + self._html_search_regex( + r'src\s*=\s*"(?P.+?)"', first, 'url' + ), video_id, headers={'referer': clean_html(original)} + ) + if 'var sources' in second: + url = self._request_webpage( + self._find_sources(second, video_id), video_id + ).geturl() + else: + third = self._download_webpage( + self._html_search_regex( + r'window.location.href = \'(?P.*)\'', second, 'url' + ), video_id + ) + url = self._find_sources(third, video_id) + + return { + 'title': title, + 'id': video_id, + 'url': url + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4b3092028..ab5c61af8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -293,6 +293,7 @@ from .discoverynetworks import DiscoveryNetworksDeIE from .discoveryvr import DiscoveryVRIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE +from .doramasmp4 import Doramasmp4IE from .dropbox import DropboxIE from .dw import ( DWIE,