From 91a6addeeb7ded7edc3f01cd59983bb2e219f047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 7 Apr 2014 16:56:15 +0200 Subject: [PATCH] Add support for rtve.es/alacarta --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/rtve.py | 84 ++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 youtube_dl/extractor/rtve.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index eec8bff16..3a91e1a46 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -208,6 +208,7 @@ from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtlnow import RTLnowIE from .rts import RTSIE +from .rtve import RTVEALaCartaIE from .rutube import ( RutubeIE, RutubeChannelIE, diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py new file mode 100644 index 000000000..77fd08dde --- /dev/null +++ b/youtube_dl/extractor/rtve.py @@ -0,0 +1,84 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re +import base64 + +from .common import InfoExtractor +from ..utils import ( + struct_unpack, +) + + +class RTVEALaCartaIE(InfoExtractor): + IE_NAME = 'rtve.es:alacarta' + IE_DESC = 'RTVE a la carta' + _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P\d+)' + + _TEST = { + 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', + 'md5': '18fcd45965bdd076efdb12cd7f6d7b9e', + 'info_dict': { + 'id': '2491869', + 'ext': 'mp4', + 'title': 'Balonmano - Swiss Cup masculina. Final: EspaƱa-Suecia', + }, + } + + def _decrypt_url(self, png): + encrypted_data = base64.b64decode(png) + text_index = encrypted_data.find(b'tEXt') + text_chunk = encrypted_data[text_index-4:] + length = struct_unpack('!I', text_chunk[:4])[0] + # Use bytearray to get integers when iterating in both python 2.x and 3.x + data = bytearray(text_chunk[8:8+length]) + data = [chr(b) for b in data if b != 0] + hash_index = data.index('#') + alphabet_data = data[:hash_index] + url_data = data[hash_index+1:] + + alphabet = [] + e = 0 + d = 0 + for l in alphabet_data: + if d == 0: + alphabet.append(l) + d = e = (e + 1) % 4 + else: + d -= 1 + url = '' + f = 0 + e = 3 + b = 1 + for letter in url_data: + if f == 0: + l = int(letter)*10 + f = 1 + else: + if e == 0: + l += int(letter) + url += alphabet[l] + e = (b + 3) % 4 + f = 0 + b += 1 + else: + e -= 1 + + return url + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + info = self._download_json( + 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, + video_id)['page']['items'][0] + png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id + png = self._download_webpage(png_url, video_id, 'Downloading url information') + video_url = self._decrypt_url(png) + + return { + 'id': video_id, + 'title': info['title'], + 'url': video_url, + 'thumbnail': info['image'], + }