From 87f50e3feb609f8fc04a378651df87d82a8fc11c Mon Sep 17 00:00:00 2001 From: Tristan Waddington Date: Sat, 7 Mar 2020 15:20:41 -0800 Subject: [PATCH] Split Thumbzilla out into it's own extractor. This simplifies the pornhub extractor and makes it easier to maintain in the future. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pornhub.py | 8 +--- youtube_dl/extractor/thumbzilla.py | 61 ++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 youtube_dl/extractor/thumbzilla.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 64d1fa251..7f1c9b24d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1129,6 +1129,7 @@ from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE from .threeqsdn import ThreeQSDNIE +from .thumbzilla import ThumbzillaIE from .tiktok import ( TikTokIE, TikTokUserIE, diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index b8f65af7c..24da75298 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -51,10 +51,7 @@ class PornHubIE(PornHubBaseIE): IE_DESC = 'PornHub and Thumbzilla' _VALID_URL = r'''(?x) https?:// - (?: - (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| - (?:www\.)?thumbzilla\.com/video/ - ) + (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/) (?P[\da-z]+) ''' _TESTS = [{ @@ -140,9 +137,6 @@ class PornHubIE(PornHubBaseIE): # private video 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7', 'only_matching': True, - }, { - 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', - 'only_matching': True, }, { 'url': 'http://www.pornhub.com/video/show?viewkey=648719015', 'only_matching': True, diff --git a/youtube_dl/extractor/thumbzilla.py b/youtube_dl/extractor/thumbzilla.py new file mode 100644 index 000000000..6a74cf4d9 --- /dev/null +++ b/youtube_dl/extractor/thumbzilla.py @@ -0,0 +1,61 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urllib_request +from .openload import PhantomJSwrapper +from .pornhub import PornHubIE +from ..utils import ExtractorError + + +class ThumbzillaIE(InfoExtractor): + """ + ThumbzillaIE is a frontend for other 'Tube' sites (mostly PornHub). ThumbzillaIE will + parse the video and delegate to the appropriate extractor via a url_result. + """ + IE_NAME = 'thumbzilla' + IE_DESC = 'Thumbzilla' + _VALID_URL = r'https?://(?P(?:www\.)?thumbzilla\.com)/video/(?P[\da-z]+)' + + _TEST = { + 'url': 'https://www.thumbzilla.com/video/ph5c8e8f15b40ff/hot-skinny-girl-gives-you', + 'info_dict': { + 'id': 'ph5c8e8f15b40ff', + 'ext': 'mp4', + 'upload_date': '20190317', + 'age_limit': 18, + 'uploader': 'lizashultz', + 'title': 'Hot skinny girl gives you.', + } + } + + def _download_webpage_handle(self, *args, **kwargs): + def dl(*args, **kwargs): + return super(ThumbzillaIE, self)._download_webpage_handle(*args, **kwargs) + + webpage, urlh = dl(*args, **kwargs) + + if any(re.search(p, webpage) for p in ( + r']+\bonload=["\']go\(\)', + r'document\.cookie\s*=\s*["\']RNKEY=', + r'document\.location\.reload\(true\)')): + url_or_request = args[0] + url = (url_or_request.get_full_url() + if isinstance(url_or_request, compat_urllib_request.Request) + else url_or_request) + phantom = PhantomJSwrapper(self, required_version='2.0') + phantom.get(url, html=webpage) + webpage, urlh = dl(*args, **kwargs) + + return webpage, urlh + + def _real_extract(self, url): + host, video_id = re.match(self._VALID_URL, url).groups() + + if video_id.startswith('ph'): + return self.url_result('https://pornhub.com/view_video.php?viewkey=%s' % video_id, + video_id=video_id, ie=PornHubIE.ie_key()) + else: + raise ExtractorError('Unsupported video')