[rds] extract 9c9media formats

8 years ago · 20361b4f25
parent 05a0068a76
commit 20361b4f25
1 changed files with 12 additions and 15 deletions
--- a/youtube_dl/extractor/rds.py
+++ b/youtube_dl/extractor/rds.py
@ -1,23 +1,23 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor
 from ..utils import (
    parse_duration,
    parse_iso8601,
+    js_to_json,
 )
+from ..compat import compat_str


 class RDSIE(InfoExtractor):
    IE_DESC = 'RDS.ca'
-    _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)'
+    _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'

    _TESTS = [{
        'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
        'info_dict': {
-            'id': '3.1132799',
+            'id': '604333',
            'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
            'ext': 'mp4',
            'title': 'Fowler Jr. prend la direction de Jacksonville',
@ -33,22 +33,17 @@ class RDSIE(InfoExtractor):
    }]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id')
+        display_id = self._match_id(url)

        webpage = self._download_webpage(url, display_id)

-        # TODO: extract f4m from 9c9media.com
-        video_url = self._search_regex(
-            r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"',
-            webpage, 'video url')
-
-        title = self._og_search_title(webpage) or self._html_search_meta(
+        item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json)
+        video_id = compat_str(item['id'])
+        title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta(
            'title', webpage, 'title', fatal=True)
        description = self._og_search_description(webpage) or self._html_search_meta(
            'description', webpage, 'description')
-        thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(
+        thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex(
            [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
             r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
            webpage, 'thumbnail', fatal=False)
@ -61,13 +56,15 @@ class RDSIE(InfoExtractor):
        age_limit = self._family_friendly_search(webpage)

        return {
+            '_type': 'url_transparent',
            'id': video_id,
            'display_id': display_id,
-            'url': video_url,
+            'url': '9c9media:rds_web:%s' % video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'timestamp': timestamp,
            'duration': duration,
            'age_limit': age_limit,
+            'ie_key': 'NineCNineMedia',
        }