From 622638512b8241c39837b634e75c44cf9105a299 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 4 Sep 2016 16:25:59 +0800 Subject: [PATCH] [rottentomatoes] Fix extraction Closes #10467 --- ChangeLog | 1 + youtube_dl/extractor/rottentomatoes.py | 30 +++++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2809e55d7..e6a2d24e1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [rottentomatoes] Fix extraction (#10467) * [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index f9cd48790..df39ed3f2 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -1,8 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse -from .internetvideoarchive import InternetVideoArchiveIE +from ..utils import js_to_json class RottenTomatoesIE(InfoExtractor): @@ -11,21 +10,36 @@ class RottenTomatoesIE(InfoExtractor): _TEST = { 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', 'info_dict': { - 'id': '613340', + 'id': '11028566', 'ext': 'mp4', 'title': 'Toy Story 3', + 'thumbnail': 're:^https?://.*\.jpg$', }, } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - og_video = self._og_search_video_url(webpage) - query = compat_urlparse.urlparse(og_video).query + + params = self._parse_json( + self._search_regex(r'(?s)RTVideo\(({.+?})\);', webpage, 'player parameters'), + video_id, transform_source=lambda s: js_to_json(s.replace('window.location.href', '""'))) + + formats = [] + if params.get('urlHLS'): + formats.extend(self._extract_m3u8_formats( + params['urlHLS'], video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + if params.get('urlMP4'): + formats.append({ + 'url': params['urlMP4'], + 'format_id': 'mp4', + }) + self._sort_formats(formats) return { - '_type': 'url_transparent', - 'url': InternetVideoArchiveIE._build_xml_url(query), - 'ie_key': InternetVideoArchiveIE.ie_key(), + 'id': video_id, 'title': self._og_search_title(webpage), + 'formats': formats, + 'thumbnail': params.get('thumbnailImg'), }