From c3ffe39c39ceb5eb9c52f22771f73ec0b99e497b Mon Sep 17 00:00:00 2001 From: Jeff Sharkey Date: Mon, 6 Jul 2020 22:18:34 -0600 Subject: [PATCH] [realvision] Add extractor for RealVision. Real Vision examines finance, business and the global economy with videos and reports that provide real in-depth analysis from real experts. Internally they're using Brightcove for content distribution, but we need to make several requests to discover the Realvision video ID which looks like an MD5 hash, and then use that to discover the Brightcove video ID. Along the way we collect better metadata like published-vs-filmed dates, and a cleaner thumbnail. Some videos are free, but others behind the membership wall require the usage of a cookie jar. --- docs/supportedsites.md | 1 + youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/realvision.py | 76 ++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 youtube_dl/extractor/realvision.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 35c1050e5..a87597ae3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -717,6 +717,7 @@ - **RayWenderlichCourse** - **RBMARadio** - **RDS**: RDS.ca + - **RealVision** - **RedBullTV** - **RedBullTVRrnContent** - **Reddit** diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4b3092028..41c6ffafd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -916,6 +916,7 @@ from .raywenderlich import ( ) from .rbmaradio import RBMARadioIE from .rds import RDSIE +from .realvision import RealVisionIE from .redbulltv import ( RedBullTVIE, RedBullTVRrnContentIE, diff --git a/youtube_dl/extractor/realvision.py b/youtube_dl/extractor/realvision.py new file mode 100644 index 000000000..e276319aa --- /dev/null +++ b/youtube_dl/extractor/realvision.py @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from datetime import datetime + +from ..utils import ( + try_get, + int_or_none, + float_or_none, + ExtractorError, +) + + +class RealVisionIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?realvision\.com/shows/.*?/videos/.*?' + + _TEST = { + 'url': 'https://www.realvision.com/shows/the-interview/videos/how-coronavirus-exposed-the-shaky-foundation', + 'info_dict': { + 'uploader_id': '3117927975001', + 'id': '6145338821001', + 'ext': 'mp4', + 'title': 'How Coronavirus Exposed the "Shaky Foundation"', + 'description': 'What happens when an upheaval so massive forces financial markets, governments, and society to rethink how our systems work? Michael Krieger, author of the Liberty Blitzkrieg, joins Real Vision to explain what coronavirus and the response to the outbreak has revealed about the condition of American systems – from financial markets to the health care system. Tracing the story of financial markets and societal trends over the past two decades, Krieger outlines how our systems have been pushed to the brink – focusing on emergency policy responses and the everything bubble. He also provides viewers with potential solutions to the systemic decay that has been brought to the forefront by the coronavirus outbreak.', + 'thumbnail': 'https://www.realvision.com:443/rv/media/Video/d9b77910037f458497a56654165e459e/thumbnail', + 'timestamp': 1585371600, + 'upload_date': '20200312', + 'release_date': '20200328', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['BrightcoveNew'], + } + + REALVISION_URL_TEMPLATE = 'https://www.realvision.com/rv/api/videos/%s?include=videoassets,viewer' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3117927975001/default_default/index.html?videoId=%s' + + def strdate_from_timestamp(timestamp): + return datetime.utcfromtimestamp(timestamp).strftime('%Y%m%d') if timestamp else None + + def _real_extract(self, url): + webpage = self._download_webpage(url, url) + rv_id = self._search_regex( + r'', + webpage, 'rv_id', fatal=True) + meta = self._download_json(self.REALVISION_URL_TEMPLATE % (rv_id), rv_id) + + included = meta.get('included') + if not included: + raise ExtractorError('No assets for %s; missing cookiefile with paid subscription?' % rv_id, expected=True) + + bc_id = try_get(included[0], lambda x: x['attributes']['videoassets_brightcove_id']) + if not bc_id: + raise ExtractorError('No Brightcove assets for %s.' % rv_id, expected=True) + + filmed_timestamp = int_or_none(try_get(meta, lambda x: x['data']['attributes']['video_filmed_on']), scale=1000) + published_timestamp = int_or_none(try_get(meta, lambda x: x['data']['attributes']['video_published_on']), scale=1000) + + return { + '_type': 'url_transparent', + 'url': self.BRIGHTCOVE_URL_TEMPLATE % (bc_id), + 'title': try_get(meta, lambda x: x['data']['attributes']['video_title']), + 'description': try_get(meta, lambda x: x['data']['attributes']['video_description']), + 'thumbnail': try_get(meta, lambda x: x['data']['links']['thumbnail']), + 'timestamp': published_timestamp, + 'upload_date': RealVisionIE.strdate_from_timestamp(filmed_timestamp), + 'release_date': RealVisionIE.strdate_from_timestamp(published_timestamp), + 'like_count': int_or_none(try_get(meta, lambda x: x['data']['attributes']['video_likes_count'])), + 'dislike_count': int_or_none(try_get(meta, lambda x: x['data']['attributes']['video_dislikes_count'])), + 'average_rating': float_or_none(try_get(meta, lambda x: x['data']['attributes']['video_rating'])), + 'ie_key': 'BrightcoveNew', + }