from __future__ import unicode_literals import re from .common import InfoExtractor from ..utils import ( int_or_none, parse_duration, remove_end, xpath_element, xpath_text, ) class DigitallySpeakingIE(InfoExtractor): _VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P[^.]+)\.xml' _TESTS = [{ # From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface 'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml', 'md5': 'a8efb6c31ed06ca8739294960b2dbabd', 'info_dict': { 'id': '840376_BQRC', 'ext': 'mp4', 'title': 'Tenacious Design and The Interface of \'Destiny\'', }, }, { # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC 'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml', 'only_matching': True, }, { # From http://www.gdcvault.com/play/1013700/Advanced-Material 'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml', 'only_matching': True, }] def _parse_mp4(self, metadata): video_formats = [] video_root = None mp4_video = xpath_text(metadata, './mp4video', default=None) if mp4_video is not None: mobj = re.match(r'(?Phttps?://.*?/).*', mp4_video) video_root = mobj.group('root') if video_root is None: http_host = xpath_text(metadata, 'httpHost', default=None) if http_host: video_root = 'http://%s/' % http_host if video_root is None: # Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js # Works for GPUTechConf, too video_root = 'http://s3-2u.digitallyspeaking.com/' formats = metadata.findall('./MBRVideos/MBRVideo') if not formats: return None for a_format in formats: stream_name = xpath_text(a_format, 'streamName', fatal=True) video_path = re.match(r'mp4\:(?P.*)', stream_name).group('path') url = video_root + video_path bitrate = xpath_text(a_format, 'bitrate') tbr = int_or_none(bitrate) vbr = int_or_none(self._search_regex( r'-(\d+)\.mp4', video_path, 'vbr', default=None)) abr = tbr - vbr if tbr and vbr else None video_formats.append({ 'format_id': bitrate, 'url': url, 'tbr': tbr, 'vbr': vbr, 'abr': abr, }) return video_formats def _parse_flv(self, metadata): formats = [] akamai_url = xpath_text(metadata, './akamaiHost', fatal=True) audios = metadata.findall('./audios/audio') for audio in audios: formats.append({ 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, 'play_path': remove_end(audio.get('url'), '.flv'), 'ext': 'flv', 'vcodec': 'none', 'format_id': audio.get('code'), }) slide_video_path = xpath_text(metadata, './slideVideo', fatal=True) formats.append({ 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, 'play_path': remove_end(slide_video_path, '.flv'), 'ext': 'flv', 'format_note': 'slide deck video', 'quality': -2, 'preference': -2, 'format_id': 'slides', }) speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True) formats.append({ 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, 'play_path': remove_end(speaker_video_path, '.flv'), 'ext': 'flv', 'format_note': 'speaker video', 'quality': -1, 'preference': -1, 'format_id': 'speaker', }) return formats def _real_extract(self, url): video_id = self._match_id(url) xml_description = self._download_xml(url, video_id) metadata = xpath_element(xml_description, 'metadata') video_formats = self._parse_mp4(metadata) if video_formats is None: video_formats = self._parse_flv(metadata) return { 'id': video_id, 'formats': video_formats, 'title': xpath_text(metadata, 'title', fatal=True), 'duration': parse_duration(xpath_text(metadata, 'endTime')), 'creator': xpath_text(metadata, 'speaker'), }