From 25ac63ed71bdc2a82842a593db9a150a0b8b7a6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 23 Feb 2015 21:52:07 +0100 Subject: [PATCH] [rtve] Extract subtitles --- test/test_subtitles.py | 15 +++++++++++++++ youtube_dl/extractor/rtve.py | 12 ++++++++++++ 2 files changed, 27 insertions(+) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 7f93f0a75..3f2d8a2ba 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -25,6 +25,7 @@ from youtube_dl.extractor import ( RaiIE, VikiIE, ThePlatformIE, + RTVEALaCartaIE, ) @@ -305,5 +306,19 @@ class TestThePlatformSubtitles(BaseTestSubtitles): self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') +class TestRtveSubtitles(BaseTestSubtitles): + url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/' + IE = RTVEALaCartaIE + + def test_allsubtitles(self): + print('Skipping, only available from Spain') + return + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['es'])) + self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') + + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index e60f85b5b..27cd34b7d 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -102,14 +102,26 @@ class RTVEALaCartaIE(InfoExtractor): video_url = compat_urlparse.urljoin( 'http://mvod1.akcdn.rtve.es/', video_path) + subtitles = None + if info.get('sbtFile') is not None: + subtitles = self.extract_subtitles(video_id, info['sbtFile']) + return { 'id': video_id, 'title': info['title'], 'url': video_url, 'thumbnail': info.get('image'), 'page_url': url, + 'subtitles': subtitles, } + def _get_subtitles(self, video_id, sub_file): + subs = self._download_json( + sub_file + '.json', video_id, + 'Downloading subtitles info')['page']['items'] + return dict((s['lang'], [{'ext': 'vtt', 'url': s['src']}]) + for s in subs) + class RTVELiveIE(InfoExtractor): IE_NAME = 'rtve.es:live'