From c63f5fb8633dda1b0a673c90d537e93497a8d62d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 19 Jul 2018 01:59:00 +0700 Subject: [PATCH] [slutload] Fix and improve extraction (closes #17001) --- youtube_dl/extractor/slutload.py | 57 +++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py index 6fc2ff60d..661f9e59d 100644 --- a/youtube_dl/extractor/slutload.py +++ b/youtube_dl/extractor/slutload.py @@ -1,12 +1,10 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor class SlutloadIE(InfoExtractor): - _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P[^/]+)/?$' + _VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P[^/]+)' _TESTS = [{ 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', 'md5': '868309628ba00fd488cf516a113fd717', @@ -16,33 +14,52 @@ class SlutloadIE(InfoExtractor): 'title': 'virginie baisee en cam', 'age_limit': 18, 'thumbnail': r're:https?://.*?\.jpg' - } + }, }, { # mobile site 'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/', 'only_matching': True, + }, { + 'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/', + 'only_matching': True, + }, { + 'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url) - webpage = self._download_webpage(desktop_url, video_id) + embed_page = self._download_webpage( + 'http://www.slutload.com/embed_player/%s' % video_id, video_id, + 'Downloading embed page', fatal=False) - video_title = self._html_search_regex(r'

([^<]+)', - webpage, 'title').strip() + if embed_page: + def extract(what): + return self._html_search_regex( + r'data-video-%s=(["\'])(?P(?:(?!\1).)+)\1' % what, + embed_page, 'video %s' % what, default=None, group='url') - video_url = self._html_search_regex( - r'(?s)
([^<]+)', embed_page, 'title', default=video_id) + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': extract('preview'), + 'age_limit': 18 + } - return { + webpage = self._download_webpage( + 'http://www.slutload.com/video/_/%s/' % video_id, video_id) + title = self._html_search_regex( + r'

([^<]+)', webpage, 'title').strip() + info = self._parse_html5_media_entries(url, webpage, video_id)[0] + info.update({ 'id': video_id, - 'url': video_url, - 'title': video_title, - 'thumbnail': thumbnail, - 'age_limit': 18 - } + 'title': title, + 'age_limit': 18, + }) + return info