diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 25cc044d0..6a04735fa 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -44,6 +44,7 @@ from .extractor.stanfordoc import StanfordOpenClassroomIE from .extractor.steam import SteamIE from .extractor.ted import TEDIE from .extractor.vimeo import VimeoIE +from .extractor.worldstarhiphop import WorldStarHipHopIE from .extractor.xnxx import XNXXIE from .extractor.xvideos import XVideosIE from .extractor.yahoo import YahooIE, YahooSearchIE @@ -258,45 +259,6 @@ class UstreamIE(InfoExtractor): } return info -class WorldStarHipHopIE(InfoExtractor): - _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P.*)' - IE_NAME = u'WorldStarHipHop' - - def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('id') - - webpage_src = self._download_webpage(url, video_id) - - video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)', - webpage_src, u'video URL') - - if 'mp4' in video_url: - ext = 'mp4' - else: - ext = 'flv' - - video_title = self._html_search_regex(r"(.*)", - webpage_src, u'title') - - # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. - thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />', - webpage_src, u'thumbnail', fatal=False) - - if not thumbnail: - _title = r"""candytitles.*>(.*)""" - mobj = re.search(_title, webpage_src) - if mobj is not None: - video_title = mobj.group(1) - - results = [{ - 'id': video_id, - 'url' : video_url, - 'title' : video_title, - 'thumbnail' : thumbnail, - 'ext' : ext, - }] - return results class RBMARadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P[^/]+)$' diff --git a/youtube_dl/extractor/worldstarhiphop.py b/youtube_dl/extractor/worldstarhiphop.py new file mode 100644 index 000000000..54a77b696 --- /dev/null +++ b/youtube_dl/extractor/worldstarhiphop.py @@ -0,0 +1,44 @@ +import re + +from .common import InfoExtractor + + +class WorldStarHipHopIE(InfoExtractor): + _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P.*)' + IE_NAME = u'WorldStarHipHop' + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + + webpage_src = self._download_webpage(url, video_id) + + video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)', + webpage_src, u'video URL') + + if 'mp4' in video_url: + ext = 'mp4' + else: + ext = 'flv' + + video_title = self._html_search_regex(r"(.*)", + webpage_src, u'title') + + # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. + thumbnail = self._html_search_regex(r'rel="image_src" href="(.*)" />', + webpage_src, u'thumbnail', fatal=False) + + if not thumbnail: + _title = r"""candytitles.*>(.*)""" + mobj = re.search(_title, webpage_src) + if mobj is not None: + video_title = mobj.group(1) + + results = [{ + 'id': video_id, + 'url' : video_url, + 'title' : video_title, + 'thumbnail' : thumbnail, + 'ext' : ext, + }] + return results