From dcb3c22e0beedd7e6ee7ca2cc988a5c9b80d5f11 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 7 Jan 2012 01:30:30 +0100 Subject: [PATCH] MTV IE --- youtube_dl/__init__.py | 95 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 818a3a73a..f6d6e7515 100755 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -3884,6 +3884,100 @@ class StanfordOpenClassroomIE(InfoExtractor): assert entry['type'] == 'reference' self.extract(entry['url']) +class MTVIE(InfoExtractor): + """Information extractor for MTV.com""" + + _VALID_URL = r'^(?Phttps?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P[0-9]+)/[^/]+$' + IE_NAME = u'mtv' + + def report_webpage(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + if not mobj.group('proto'): + url = 'http://' + url + video_id = mobj.group('videoid') + self.report_webpage(video_id) + + request = urllib2.Request(url) + try: + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + return + + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract song name') + return + song_name = _unescapeHTML(mobj.group(1).decode('iso-8859-1')) + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract performer') + return + performer = _unescapeHTML(mobj.group(1).decode('iso-8859-1')) + video_title = performer + ' - ' + song_name + + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to mtvn_uri') + return + mtvn_uri = mobj.group(1) + + mobj = re.search(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract content id') + return + content_id = mobj.group(1) + + videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri + self.report_extraction(video_id) + request = urllib2.Request(videogen_url) + try: + metadataXml = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err)) + return + + mdoc = xml.etree.ElementTree.fromstring(metadataXml) + renditions = mdoc.findall('.//rendition') + + # For now, always pick the highest quality. + rendition = renditions[-1] + + try: + _,_,ext = rendition.attrib['type'].partition('/') + format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate'] + video_url = rendition.find('./src').text + except KeyError: + self._downloader.trouble('Invalid rendition field.') + return + + self._downloader.increment_downloads() + info = { + 'id': video_id, + 'url': video_url, + 'uploader': performer, + 'title': video_title, + 'stitle': _simplify_title(video_title), + 'ext': ext, + 'format': format, + } + + try: + self._downloader.process_info(info) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download ' + video_id) + class PostProcessor(object): """Post Processor class. @@ -4336,6 +4430,7 @@ def gen_extractors(): InfoQIE(), MixcloudIE(), StanfordOpenClassroomIE(), + MTVIE(), GenericIE() ]