youtube-dl/youtube_dl/extractor/vevo.py

import re
import json
import xml.etree.ElementTree
import datetime

from .common import InfoExtractor
from ..utils import (
    determine_ext,
    ExtractorError,
)


class VevoIE(InfoExtractor):
    """
    Accepts urls from vevo.com or in the format 'vevo:{id}'
    (currently used by MTVIE)
    """
    _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
    _TEST = {
        u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
        u'file': u'GB1101300280.mp4',
        u'info_dict': {
            u"upload_date": u"20130624",
            u"uploader": u"Hurts",
            u"title": u"Somebody to Die For",
            u'duration': 230,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
        info_json = self._download_webpage(json_url, video_id, u'Downloading json info')

        self.report_extraction(video_id)
        video_info = json.loads(info_json)['video']
        last_version = {'version': -1}
        for version in video_info['videoVersions']:
            # These are the HTTP downloads, other types are for different manifests
            if version['sourceType'] == 2:
                if version['version'] > last_version['version']:
                    last_version = version
        if last_version['version'] == -1:
            raise ExtractorError(u'Unable to extract last version of the video')

        renditions = xml.etree.ElementTree.fromstring(last_version['data'])
        formats = []
        # Already sorted from worst to best quality
        for rend in renditions.findall('rendition'):
            attr = rend.attrib
            format_note = '%(videoCodec)s@%(videoBitrate)4sK, %(audioCodec)s@%(audioBitrate)3sK' % attr
            formats.append({
                'url': attr['url'],
                'format_id': attr['name'],
                'format_note': format_note,
                'height': int(attr['frameheight']),
                'width': int(attr['frameWidth']),
            })

        timestamp_ms = int(self._search_regex(
            r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
        upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
        info = {
            'id': video_id,
            'title': video_info['title'],
            'formats': formats,
            'thumbnail': video_info['imageUrl'],
            'upload_date': upload_date.strftime('%Y%m%d'),
            'uploader': video_info['mainArtists'][0]['artistName'],
            'duration': video_info['duration'],
        }

        return info
Add VevoIE 11 years ago			`import re`
			`import json`
[vevo] Some improvements (fixes #1580) Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id} Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video. Extract all the available formats and set the 'formats' field of the result 11 years ago			`import xml.etree.ElementTree`
			`import datetime`
Add VevoIE 11 years ago
			`from .common import InfoExtractor`
			`from ..utils import (`
[vevo] Some improvements (fixes #1580) Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id} Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video. Extract all the available formats and set the 'formats' field of the result 11 years ago			`determine_ext,`
Add VevoIE 11 years ago			`ExtractorError,`
			`)`

[vevo] Some improvements (fixes #1580) Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id} Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video. Extract all the available formats and set the 'formats' field of the result 11 years ago
Add VevoIE 11 years ago			`class VevoIE(InfoExtractor):`
MTVIE: add support for Vevo videos (related #913) 11 years ago			`"""`
[vevo] fix testcase 11 years ago			`Accepts urls from vevo.com or in the format 'vevo:{id}'`
MTVIE: add support for Vevo videos (related #913) 11 years ago			`(currently used by MTVIE)`
			`"""`
[vevo] Fix urls with a query (#1258) 11 years ago			`_VALID_URL = r'((http://www.vevo.com/watch/.?/.?/)\|(vevo:))(?P<id>.*?)(\?\|$)'`
Move tests to the IE definitions 11 years ago			`_TEST = {`
			`u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',`
			`u'file': u'GB1101300280.mp4',`
			`u'info_dict': {`
[vevo] Fix urls with a query (#1258) 11 years ago			`u"upload_date": u"20130624",`
			`u"uploader": u"Hurts",`
[vevo] Some improvements (fixes #1580) Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id} Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video. Extract all the available formats and set the 'formats' field of the result 11 years ago			`u"title": u"Somebody to Die For",`
			`u'duration': 230,`
Move tests to the IE definitions 11 years ago			`}`
			`}`
Add VevoIE 11 years ago
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`video_id = mobj.group('id')`

[vevo] Some improvements (fixes #1580) Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id} Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video. Extract all the available formats and set the 'formats' field of the result 11 years ago			`json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id`
Add VevoIE 11 years ago			`info_json = self._download_webpage(json_url, video_id, u'Downloading json info')`

			`self.report_extraction(video_id)`
[vevo] Some improvements (fixes #1580) Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id} Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video. Extract all the available formats and set the 'formats' field of the result 11 years ago			`video_info = json.loads(info_json)['video']`
			`last_version = {'version': -1}`
			`for version in video_info['videoVersions']:`
			`# These are the HTTP downloads, other types are for different manifests`
			`if version['sourceType'] == 2:`
			`if version['version'] > last_version['version']:`
			`last_version = version`
			`if last_version['version'] == -1:`
			`raise ExtractorError(u'Unable to extract last version of the video')`

			`renditions = xml.etree.ElementTree.fromstring(last_version['data'])`
			`formats = []`
			`# Already sorted from worst to best quality`
			`for rend in renditions.findall('rendition'):`
			`attr = rend.attrib`
[vevo] Add more format details 11 years ago			`format_note = '%(videoCodec)s@%(videoBitrate)4sK, %(audioCodec)s@%(audioBitrate)3sK' % attr`
[vevo] Some improvements (fixes #1580) Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id} Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video. Extract all the available formats and set the 'formats' field of the result 11 years ago			`formats.append({`
[vevo] Add more format details 11 years ago			`'url': attr['url'],`
			`'format_id': attr['name'],`
			`'format_note': format_note,`
[vevo] Some improvements (fixes #1580) Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id} Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video. Extract all the available formats and set the 'formats' field of the result 11 years ago			`'height': int(attr['frameheight']),`
			`'width': int(attr['frameWidth']),`
			`})`

[vevo] Fix timestamp handling ( / 1000 is implicit float division ) 11 years ago			`timestamp_ms = int(self._search_regex(`
			`r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))`
			`upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)`
[vevo] Some improvements (fixes #1580) Extract the info from http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc={id} Some videos don't have an smil manifest, extract the video urls directly from the json and use the last version of the video. Extract all the available formats and set the 'formats' field of the result 11 years ago			`info = {`
			`'id': video_id,`
			`'title': video_info['title'],`
			`'formats': formats,`
			`'thumbnail': video_info['imageUrl'],`
			`'upload_date': upload_date.strftime('%Y%m%d'),`
			`'uploader': video_info['mainArtists'][0]['artistName'],`
			`'duration': video_info['duration'],`
			`}`

			`return info`