From 2ca29f1aafda58fd7fd6db55d8b99eb89e07f2df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 20 May 2017 01:28:42 +0700 Subject: [PATCH] [toypics] Improve and modernize --- youtube_dl/extractor/toypics.py | 34 ++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py index 774fa565c..f705a06c9 100644 --- a/youtube_dl/extractor/toypics.py +++ b/youtube_dl/extractor/toypics.py @@ -6,43 +6,48 @@ import re class ToypicsIE(InfoExtractor): - IE_DESC = 'Toypics user profile' - _VALID_URL = r'https?://videos\.toypics\.net/view/(?P[0-9]+)/.*' + IE_DESC = 'Toypics video' + _VALID_URL = r'https?://videos\.toypics\.net/view/(?P[0-9]+)' _TEST = { 'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/', 'md5': '16e806ad6d6f58079d210fe30985e08b', 'info_dict': { 'id': '514', 'ext': 'mp4', - 'title': 'Chance-Bulge\'d, 2', + 'title': "Chance-Bulge'd, 2", 'age_limit': 18, 'uploader': 'kidsune', } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - page = self._download_webpage(url, video_id) - formats = self._parse_html5_media_entries(url, page, video_id)[0]['formats'] + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + formats = self._parse_html5_media_entries( + url, webpage, video_id)[0]['formats'] title = self._html_search_regex([ r']+class=["\']view-video-title[^>]+>([^<]+)([^<]+) - Toypics', - ], page, 'title') - username = self._html_search_regex( - r'More videos from ([^<]+)', page, 'username') + ], webpage, 'title') + + uploader = self._html_search_regex( + r'More videos from ([^<]+)', webpage, 'uploader', + fatal=False) + return { 'id': video_id, 'formats': formats, 'title': title, - 'uploader': username, + 'uploader': uploader, 'age_limit': 18, } class ToypicsUserIE(InfoExtractor): IE_DESC = 'Toypics user profile' - _VALID_URL = r'https?://videos\.toypics\.net/(?P[^/?]+)(?:$|[?#])' + _VALID_URL = r'https?://videos\.toypics\.net/(?!view)(?P[^/?#&]+)' _TEST = { 'url': 'http://videos.toypics.net/Mikey', 'info_dict': { @@ -52,8 +57,7 @@ class ToypicsUserIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - username = mobj.group('username') + username = self._match_id(url) profile_page = self._download_webpage( url, username, note='Retrieving profile page') @@ -72,7 +76,7 @@ class ToypicsUserIE(InfoExtractor): note='Downloading page %d/%d' % (n, page_count)) urls.extend( re.findall( - r']+class=["\']preview[^>]+>\s*]+href="(https?://videos.toypics.net/view/[^"]+)"', + r']+class=["\']preview[^>]+>\s*]+href="(https?://videos\.toypics\.net/view/[^"]+)"', lpage)) return {