From ae6423d7042de863c303fcbdc1646ea370233741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 13 Feb 2015 11:36:33 +0100 Subject: [PATCH 01/20] [bambuser] Fix 'uploader_id' extraction (fixes #4944) --- youtube_dl/extractor/bambuser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 98e1443ab..c193e66ca 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor): 'duration': int(info['length']), 'view_count': int(info['views_total']), 'uploader': info['username'], - 'uploader_id': info['uid'], + 'uploader_id': info['owner']['uid'], } From 36e7a4ca2e85be8dc16955e28f9c563e01aa9eca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 13 Feb 2015 14:43:50 +0100 Subject: [PATCH 02/20] [test/subtitles] Update checksums --- test/test_subtitles.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 6336dd317..3e329438f 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -247,7 +247,7 @@ class TestVimeoSubtitles(BaseTestSubtitles): def test_subtitles(self): self.DL.params['writesubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4') + self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') def test_subtitles_lang(self): self.DL.params['writesubtitles'] = True @@ -334,7 +334,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles): self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), set(['cs'])) - self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4') + self.assertEqual(md5(subtitles['cs']), 'cc3957b2c6dff1db71e5f2e83d467480') def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') From 037e9437e435f32d287354c77f6586d7dc34544b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Feb 2015 20:10:42 +0600 Subject: [PATCH 03/20] [camdemy] Fix _VALID_URL --- youtube_dl/extractor/camdemy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py index 5de5879b4..897f3a104 100644 --- a/youtube_dl/extractor/camdemy.py +++ b/youtube_dl/extractor/camdemy.py @@ -16,7 +16,7 @@ from ..utils import ( class CamdemyIE(InfoExtractor): - _VALID_URL = r'http://www.camdemy.com/media/(?P\d+)' + _VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P\d+)' _TESTS = [{ # single file 'url': 'http://www.camdemy.com/media/5181/', From 9347fddbfc81114f85fecc72e8452813ef130fdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Feb 2015 02:04:28 +0600 Subject: [PATCH 04/20] [1tv] Cover arbitraty URLs --- youtube_dl/extractor/firsttv.py | 57 ++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 08ceee4ed..6e015ca16 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -8,45 +8,66 @@ from ..utils import int_or_none class FirstTVIE(InfoExtractor): - IE_NAME = 'firsttv' - IE_DESC = 'Видеоархив - Первый канал' - _VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P\d+)' + IE_NAME = '1tv' + IE_DESC = 'Первый канал' + _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P.+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.1tv.ru/videoarchive/73390', - 'md5': '3de6390cf0cca4a5eae1d1d83895e5ad', + 'md5': '777f525feeec4806130f4f764bc18a4f', 'info_dict': { 'id': '73390', 'ext': 'mp4', 'title': 'Олимпийские канатные дороги', - 'description': 'md5:cc730d2bf4215463e37fff6a1e277b13', - 'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', 'duration': 149, + 'like_count': int, + 'dislike_count': int, }, 'skip': 'Only works from Russia', - } + }, { + 'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930', + 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', + 'info_dict': { + 'id': '35930', + 'ext': 'mp4', + 'title': 'Наедине со всеми. Людмила Сенчина', + 'description': 'md5:89553aed1d641416001fe8d450f06cb9', + 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', + 'duration': 2694, + }, + 'skip': 'Only works from Russia', + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id, 'Downloading page') video_url = self._html_search_regex( - r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL') + r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''', + webpage, 'video URL') title = self._html_search_regex( - r'
\s*

([^<]*)', webpage, 'title') + [r'
\s*

([^<]*)', + r"'title'\s*:\s*'([^']+)'"], webpage, 'title') description = self._html_search_regex( - r'
\s*
 
\s*

([^<]*)

', webpage, 'description', fatal=False) + r'
\s*
 
\s*

([^<]*)

', + webpage, 'description', default=None) or self._html_search_meta( + 'description', webpage, 'description') thumbnail = self._og_search_thumbnail(webpage) - duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False) + duration = self._og_search_property( + 'video:duration', webpage, + 'video duration', fatal=False) - like_count = self._html_search_regex(r'title="Понравилось".*?/> \[(\d+)\]', - webpage, 'like count', fatal=False) - dislike_count = self._html_search_regex(r'title="Не понравилось".*?/> \[(\d+)\]', - webpage, 'dislike count', fatal=False) + like_count = self._html_search_regex( + r'title="Понравилось".*?/> \[(\d+)\]', + webpage, 'like count', default=None) + dislike_count = self._html_search_regex( + r'title="Не понравилось".*?/> \[(\d+)\]', + webpage, 'dislike count', default=None) return { 'id': video_id, From cffcbc02de504d84e1c2677bb525c00b03e04f53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 13 Feb 2015 22:25:34 +0100 Subject: [PATCH 05/20] [postprocessor/ffmpeg] Don't let ffmpeg read from stdin (fixes #4945) If you run 'while read aurl ; do youtube-dl --extract-audio "${aurl}"; done < path_to_batch_file' (batch_file contains one url per line) each call to youtube-dl consumed some characters and 'read' would assing to 'aurl' a non valid url, something like 'tube.com/watch?v='. --- youtube_dl/postprocessor/ffmpeg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 01d25f760..5238ce534 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -95,7 +95,7 @@ class FFmpegPostProcessor(PostProcessor): if self._downloader.params.get('verbose', False): self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd)) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode != 0: stderr = stderr.decode('utf-8', 'replace') @@ -134,7 +134,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): encodeFilename(self._probe_executable, True), encodeArgument('-show_streams'), encodeFilename(self._ffmpeg_filename_argument(path), True)] - handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) + handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) output = handle.communicate()[0] if handle.wait() != 0: return None From d9aa2b784d914ae99c7d9bbaf83d06f1b9dbd04e Mon Sep 17 00:00:00 2001 From: Ryan Schmidt Date: Sat, 14 Feb 2015 04:10:23 -0600 Subject: [PATCH 06/20] Support NBC Nightly News broadcasts --- youtube_dl/extractor/nbc.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f840f6532..3e3de9e2d 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -54,7 +54,7 @@ class NBCIE(InfoExtractor): class NBCNewsIE(InfoExtractor): _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/ ((video/.+?/(?P\d+))| - (feature/[^/]+/(?P.+))) + ((?P<program>feature|nightly-news)/[^/]+/(?P<title>.+))) ''' _TESTS = [ @@ -89,6 +89,16 @@ class NBCNewsIE(InfoExtractor): 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', }, }, + { + 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', + 'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d', + 'info_dict': { + 'id': 'sekXqyTVnmN3', + 'ext': 'mp4', + 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', + 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', + }, + }, ] def _real_extract(self, url): @@ -107,12 +117,19 @@ class NBCNewsIE(InfoExtractor): 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, } else: - # "feature" pages use theplatform.com + # "feature" and "nightly-news" pages use theplatform.com title = mobj.group('title') webpage = self._download_webpage(url, title) - bootstrap_json = self._search_regex( - r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json', - flags=re.MULTILINE) + program = mobj.group('program') + if program == 'feature': + bootstrap_json = self._search_regex( + r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json', + flags=re.MULTILINE) + else: + # nightly-news + bootstrap_json = self._search_regex( + r'var playlistData = ({.+});\s*$', webpage, 'playlist data', + flags=re.MULTILINE) bootstrap = json.loads(bootstrap_json) info = bootstrap['results'][0]['video'] mpxid = info['mpxId'] From a4f3d779db13ec3c8bda67e897de8cd849a7f811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 Feb 2015 17:42:12 +0600 Subject: [PATCH 07/20] [nbcnews] Simplify --- youtube_dl/extractor/nbc.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 3e3de9e2d..52e8595ea 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -52,9 +52,9 @@ class NBCIE(InfoExtractor): class NBCNewsIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/ - ((video/.+?/(?P<id>\d+))| - ((?P<program>feature|nightly-news)/[^/]+/(?P<title>.+))) + _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ + (?:video/.+?/(?P<id>\d+)| + (?:feature|nightly-news)/[^/]+/(?P<title>.+)) ''' _TESTS = [ @@ -120,17 +120,10 @@ class NBCNewsIE(InfoExtractor): # "feature" and "nightly-news" pages use theplatform.com title = mobj.group('title') webpage = self._download_webpage(url, title) - program = mobj.group('program') - if program == 'feature': - bootstrap_json = self._search_regex( - r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json', - flags=re.MULTILINE) - else: - # nightly-news - bootstrap_json = self._search_regex( - r'var playlistData = ({.+});\s*$', webpage, 'playlist data', - flags=re.MULTILINE) - bootstrap = json.loads(bootstrap_json) + bootstrap_json = self._search_regex( + r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$', + webpage, 'bootstrap json', flags=re.MULTILINE) + bootstrap = self._parse_json(bootstrap_json, video_id) info = bootstrap['results'][0]['video'] mpxid = info['mpxId'] From 3215c50f25bcf01a037e066747d7d60bd274ae71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 Feb 2015 17:44:24 +0600 Subject: [PATCH 08/20] Credit @ryandesign for nbcnews nightly news (#4948) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 3d6985ab6..47f12a9ee 100644 --- a/AUTHORS +++ b/AUTHORS @@ -110,3 +110,4 @@ Shaya Goldberg Paul Hartmann Frans de Jonge Robin de Rooij +Ryan Schmidt From ae1580d790ecca2b6ad132da26bc834e4f9873f3 Mon Sep 17 00:00:00 2001 From: peugeot <peugeot.com> Date: Sat, 14 Feb 2015 13:29:44 +0100 Subject: [PATCH 09/20] [sunporno] fix extraction --- youtube_dl/extractor/sunporno.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py index 8a333f1d2..f2a1d6550 100644 --- a/youtube_dl/extractor/sunporno.py +++ b/youtube_dl/extractor/sunporno.py @@ -52,7 +52,7 @@ class SunPornoIE(InfoExtractor): formats = [] quality = qualities(['mp4', 'flv']) - for video_url in re.findall(r'<source src="([^"]+)"', webpage): + for video_url in re.findall(r'<video src="([^"]+)"', webpage): video_ext = determine_ext(video_url) formats.append({ 'url': video_url, From 68f2d273bfeb7aa10eba68b6c62cc0502a948c72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 Feb 2015 18:33:52 +0600 Subject: [PATCH 10/20] [sunporno] Keep old video regex just in case --- youtube_dl/extractor/sunporno.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py index f2a1d6550..854d01bee 100644 --- a/youtube_dl/extractor/sunporno.py +++ b/youtube_dl/extractor/sunporno.py @@ -52,7 +52,7 @@ class SunPornoIE(InfoExtractor): formats = [] quality = qualities(['mp4', 'flv']) - for video_url in re.findall(r'<video src="([^"]+)"', webpage): + for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage): video_ext = determine_ext(video_url) formats.append({ 'url': video_url, From 10e3c4c2215a7d65391d23efd19aab96197cdcf8 Mon Sep 17 00:00:00 2001 From: peugeot <peugeot.com> Date: Sat, 14 Feb 2015 13:40:35 +0100 Subject: [PATCH 11/20] [drtuber] fix extraction --- youtube_dl/extractor/drtuber.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index ca274dff6..2f06e64cc 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -36,7 +36,7 @@ class DrTuberIE(InfoExtractor): r'<source src="([^"]+)"', webpage, 'video URL') title = self._html_search_regex( - r'<title>([^<]+)\s*-\s*Free', webpage, 'title') + r'<title>([^<]+) - \d+', webpage, 'title') thumbnail = self._html_search_regex( r'poster="([^"]+)"', From 52e1d0ccc415377bd9f7d862f645c87909705ace Mon Sep 17 00:00:00 2001 From: peugeot <peugeot.com> Date: Sat, 14 Feb 2015 13:42:42 +0100 Subject: [PATCH 12/20] [beeg] fix test --- youtube_dl/extractor/beeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index 4e79fea8f..b38057f2f 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -9,7 +9,7 @@ class BeegIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' _TEST = { 'url': 'http://beeg.com/5416503', - 'md5': '634526ae978711f6b748fe0dd6c11f57', + 'md5': '1bff67111adb785c51d1b42959ec10e5', 'info_dict': { 'id': '5416503', 'ext': 'mp4', From 76d1466b08d4e3cebc40a7fb7dae957c3b34b1eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 Feb 2015 18:50:13 +0600 Subject: [PATCH 13/20] [drtuber] Add one more title regex --- youtube_dl/extractor/drtuber.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index 2f06e64cc..37c5c181f 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor): 'id': '1740434', 'display_id': 'hot-perky-blonde-naked-golf', 'ext': 'mp4', - 'title': 'Hot Perky Blonde Naked Golf', + 'title': 'hot perky blonde naked golf', 'like_count': int, 'dislike_count': int, 'comment_count': int, @@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor): r'<source src="([^"]+)"', webpage, 'video URL') title = self._html_search_regex( - r'<title>([^<]+) - \d+', webpage, 'title') + [r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'], + webpage, 'title') thumbnail = self._html_search_regex( r'poster="([^"]+)"', From a294bce82f84482091fa6f44ce913a7a86fe8be8 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sat, 14 Feb 2015 17:48:04 +0200 Subject: [PATCH 14/20] [streamcz] Fix extraction (Closes #4940) --- youtube_dl/extractor/streamcz.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py index c3ceb5f76..e92b93285 100644 --- a/youtube_dl/extractor/streamcz.py +++ b/youtube_dl/extractor/streamcz.py @@ -1,14 +1,30 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals +import hashlib +import time + from .common import InfoExtractor +from ..compat import ( + compat_urllib_request, +) from ..utils import ( int_or_none, ) +def _get_api_key(api_path): + if api_path.endswith('?'): + api_path = api_path[:-1] + + api_key = 'fb5f58a820353bd7095de526253c14fd' + a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600))) + return hashlib.md5(a.encode('ascii')).hexdigest() + + class StreamCZIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)' + _API_URL = 'http://www.stream.cz/API' _TESTS = [{ 'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti', @@ -36,8 +52,11 @@ class StreamCZIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - data = self._download_json( - 'http://www.stream.cz/API/episode/%s' % video_id, video_id) + api_path = '/episode/%s' % video_id + + req = compat_urllib_request.Request(self._API_URL + api_path) + req.add_header('Api-Password', _get_api_key(api_path)) + data = self._download_json(req, video_id) formats = [] for quality, video in enumerate(data['video_qualities']): From b0ab0fac49057ea0df3db75fb407af92deaaa453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 14 Feb 2015 22:18:09 +0100 Subject: [PATCH 15/20] Remove unused imports --- youtube_dl/extractor/firsttv.py | 2 -- youtube_dl/extractor/nbc.py | 1 - 2 files changed, 3 deletions(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 6e015ca16..510d4b108 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -1,8 +1,6 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import int_or_none diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 52e8595ea..89a2845fe 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..compat import ( From 6ca7732d5e792f02be030d4a9c9d101fff2f4079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 14 Feb 2015 22:20:24 +0100 Subject: [PATCH 16/20] [extractor/common] Fix link to external documentation --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 48742189a..c784eedb9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -665,7 +665,7 @@ class InfoExtractor(object): return RATING_TABLE.get(rating.lower(), None) def _family_friendly_search(self, html): - # See http://schema.org/VideoObj + # See http://schema.org/VideoObject family_friendly = self._html_search_meta('isFamilyFriendly', html) if not family_friendly: From 9fb2f1cd6d14dafca7a2d6cd74b0394b0f24afae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 15 Feb 2015 04:56:12 +0600 Subject: [PATCH 17/20] [theplatform] Add URL sign capability --- youtube_dl/extractor/theplatform.py | 42 ++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 110ed976d..1579822f2 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -2,6 +2,11 @@ from __future__ import unicode_literals import re import json +import time +import hmac +import binascii +import hashlib + from .subtitles import SubtitlesInfoExtractor from ..compat import ( @@ -11,6 +16,7 @@ from ..utils import ( determine_ext, ExtractorError, xpath_with_ns, + unsmuggle_url, ) _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) @@ -18,7 +24,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language class ThePlatformIE(SubtitlesInfoExtractor): _VALID_URL = r'''(?x) - (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/ + (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/ (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)? |theplatform:)(?P<id>[^/\?&]+)''' @@ -38,9 +44,33 @@ class ThePlatformIE(SubtitlesInfoExtractor): }, } + @staticmethod + def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): + flags = '10' if include_qs else '00' + expiration_date = '%x' % (int(time.time()) + life) + + def str_to_hex(str): + return binascii.b2a_hex(str.encode('ascii')).decode('ascii') + + def hex_to_str(hex): + return binascii.a2b_hex(hex) + + relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0] + clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path)) + checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() + sig = flags + expiration_date + checksum + str_to_hex(sig_secret) + return '%s&sig=%s' % (url, sig) + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + mobj = re.match(self._VALID_URL, url) + provider_id = mobj.group('provider_id') video_id = mobj.group('id') + + if not provider_id: + provider_id = 'dJ5BDC' + if mobj.group('config'): config_url = url + '&form=json' config_url = config_url.replace('swf/', 'config/') @@ -48,8 +78,12 @@ class ThePlatformIE(SubtitlesInfoExtractor): config = self._download_json(config_url, video_id, 'Downloading config') smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' else: - smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' - 'format=smil&mbr=true'.format(video_id)) + smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?' + 'format=smil&mbr=true'.format(provider_id, video_id)) + + sig = smuggled_data.get('sig') + if sig: + smil_url = self._sign_url(smil_url, sig['key'], sig['secret']) meta = self._download_xml(smil_url, video_id) try: @@ -62,7 +96,7 @@ class ThePlatformIE(SubtitlesInfoExtractor): else: raise ExtractorError(error_msg, expected=True) - info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id) + info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id) info_json = self._download_webpage(info_url, video_id) info = json.loads(info_json) From b9c7a97318f4ca68cacf0ff5395c0d4a6f74b525 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 15 Feb 2015 04:57:52 +0600 Subject: [PATCH 18/20] [history] Add extractor (Closes #4934) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/history.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 youtube_dl/extractor/history.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a4fab540b..13292073c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -189,6 +189,7 @@ from .hellporno import HellPornoIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE from .historicfilms import HistoricFilmsIE +from .history import HistoryIE from .hitbox import HitboxIE, HitboxLiveIE from .hornbunny import HornBunnyIE from .hostingbulk import HostingBulkIE diff --git a/youtube_dl/extractor/history.py b/youtube_dl/extractor/history.py new file mode 100644 index 000000000..f86164afe --- /dev/null +++ b/youtube_dl/extractor/history.py @@ -0,0 +1,31 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class HistoryIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])' + + _TESTS = [{ + 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', + 'md5': '6fe632d033c92aa10b8d4a9be047a7c5', + 'info_dict': { + 'id': 'bLx5Dv5Aka1G', + 'ext': 'mp4', + 'title': "Bet You Didn't Know: Valentine's Day", + 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', + }, + 'add_ie': ['ThePlatform'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_url = self._search_regex( + r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id, + webpage, 'video url') + + return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}})) From f813928e4b669627007b772c9b150eed135b18be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 15 Feb 2015 16:32:38 +0600 Subject: [PATCH 19/20] [bbccouk] Fix fallback to legacy playlist --- youtube_dl/extractor/bbccouk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index 126c8824c..f23e39545 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -273,7 +273,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor): formats, subtitles = self._download_media_selector(programme_id) return programme_id, title, description, duration, formats, subtitles except ExtractorError as ee: - if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404): raise # fallback to legacy playlist From 8fb474fb17a64ff2aa9f6315ebbc99ae7938c4e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 15 Feb 2015 14:59:00 +0100 Subject: [PATCH 20/20] [test/subtitles] Fix some tests The checksym for the CeskaTelevize subtitles has changed again, so we just test that it has a reasonable length. --- test/test_subtitles.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 3e329438f..bcc69a778 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -138,7 +138,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(len(subtitles.keys()), 5) + self.assertTrue(len(subtitles.keys()) >= 6) def test_list_subtitles(self): self.DL.expect_warning('Automatic Captions not supported by this server') @@ -334,7 +334,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles): self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), set(['cs'])) - self.assertEqual(md5(subtitles['cs']), 'cc3957b2c6dff1db71e5f2e83d467480') + self.assertTrue(len(subtitles['cs']) > 20000) def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles')