From a0758dfa1afd5b04773ba3b3b17ac71d22054821 Mon Sep 17 00:00:00 2001 From: felix Date: Wed, 5 Aug 2015 22:40:46 +0200 Subject: [PATCH 0001/1696] [filmon] new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/filmon.py | 144 +++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 youtube_dl/extractor/filmon.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 578359a5e..c9b9ebd23 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -287,6 +287,7 @@ from .fc2 import ( FC2EmbedIE, ) from .fczenit import FczenitIE +from .filmon import FilmOnIE, FilmOnVODIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE diff --git a/youtube_dl/extractor/filmon.py b/youtube_dl/extractor/filmon.py new file mode 100644 index 000000000..987792fec --- /dev/null +++ b/youtube_dl/extractor/filmon.py @@ -0,0 +1,144 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import qualities +from ..compat import compat_urllib_request + + +_QUALITY = qualities(('low', 'high')) + + +class FilmOnIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P[a-z0-9-]+)' + _TESTS = [{ + 'url': 'https://www.filmon.com/channel/filmon-sports', + 'only_matching': True, + }, { + 'url': 'https://www.filmon.com/tv/2894', + 'only_matching': True, + }] + + def _real_extract(self, url): + channel_id = self._match_id(url) + + request = compat_urllib_request.Request('https://www.filmon.com/channel/%s' % (channel_id)) + request.add_header('X-Requested-With', 'XMLHttpRequest') + channel_info = self._download_json(request, channel_id) + now_playing = channel_info['now_playing'] + + thumbnails = [] + for thumb in now_playing.get('images', ()): + if thumb['type'] != '2': + continue + thumbnails.append({ + 'url': thumb['url'], + 'width': int(thumb['width']), + 'height': int(thumb['height']), + }) + + formats = [] + + for stream in channel_info['streams']: + formats.append({ + 'format_id': str(stream['id']), + # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats + # because 0) it doesn't have bitrate variants anyway, and 1) the ids generated + # by that method are highly unstable (because the bitrate is variable) + 'url': stream['url'], + 'resolution': stream['name'], + 'format_note': 'expires after %u seconds' % int(stream['watch-timeout']), + 'ext': 'mp4', + 'quality': _QUALITY(stream['quality']), + 'preference': int(stream['watch-timeout']), + }) + self._sort_formats(formats) + + return { + 'id': str(channel_info['id']), + 'display_id': channel_info['alias'], + 'formats': formats, + # XXX: use the channel description (channel_info['description'])? + 'uploader_id': channel_info['alias'], + 'uploader': channel_info['title'], # XXX: kinda stretching it... + 'title': now_playing.get('programme_name') or channel_info['title'], + 'description': now_playing.get('programme_description'), + 'thumbnails': thumbnails, + 'is_live': True, + } + + +class FilmOnVODIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?filmon\.com/vod/view/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space', + 'info_dict': { + 'id': '24869', + 'ext': 'mp4', + 'title': 'Plan 9 From Outer Space', + 'description': 'Dead human, zombies and vampires', + }, + }, { + 'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1', + 'info_dict': { + 'id': '2825', + 'title': 'Popeye Series 1', + }, + 'playlist_count': 8, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + result = self._download_json('https://www.filmon.com/api/vod/movie?id=%s' % (video_id), video_id) + if result['code'] != 200: + raise ExtractorError('FilmOn said: %s' % (result['reason']), expected=True) + + response = result['response'] + + if response.get('episodes'): + return { + '_type': 'playlist', + 'id': video_id, + 'title': response['title'], + 'entries': [{ + '_type': 'url', + 'url': 'https://www.filmon.com/vod/view/%s' % (ep), + } for ep in response['episodes']] + } + + formats = [] + for (id, stream) in response['streams'].items(): + formats.append({ + 'format_id': id, + 'url': stream['url'], + 'resolution': stream['name'], + 'format_note': 'expires after %u seconds' % int(stream['watch-timeout']), + 'ext': 'mp4', + 'quality': _QUALITY(stream['quality']), + 'preference': int(stream['watch-timeout']), + }) + self._sort_formats(formats) + + poster = response['poster'] + thumbnails = [{ + 'id': 'poster', + 'url': poster['url'], + 'width': poster['width'], + 'height': poster['height'], + }] + for (id, thumb) in poster['thumbs'].items(): + thumbnails.append({ + 'id': id, + 'url': thumb['url'], + 'width': thumb['width'], + 'height': thumb['height'], + }) + + return { + 'id': video_id, + 'title': response['title'], + 'formats': formats, + 'description': response['description'], + 'thumbnails': thumbnails, + } From ed06da4e7b274fd444a6ada23ba9bb4c559761d3 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Thu, 5 Jan 2017 04:52:42 +0300 Subject: [PATCH 0002/1696] [freesound] Fix extraction and extended (closes #11602) --- youtube_dl/extractor/freesound.py | 55 ++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/freesound.py b/youtube_dl/extractor/freesound.py index 5ff62af2a..f0b2400cf 100644 --- a/youtube_dl/extractor/freesound.py +++ b/youtube_dl/extractor/freesound.py @@ -3,6 +3,15 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + determine_ext, + float_or_none, + get_element_by_class, + get_element_by_id, + int_or_none, + parse_filesize, + unified_strdate, +) class FreesoundIE(InfoExtractor): @@ -23,17 +32,53 @@ class FreesoundIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) music_id = mobj.group('id') webpage = self._download_webpage(url, music_id) - title = self._html_search_regex( - r'
.*?(.+?)', - webpage, 'music title', flags=re.DOTALL) + + audio_url = self._og_search_property('audio', webpage, 'song url') + title = self._og_search_property('audio:title', webpage, 'song title') + duration = float_or_none(get_element_by_class('duration', webpage), scale=1000) + tags = get_element_by_class('tags', webpage) + sound_info = get_element_by_id('sound_information_box', webpage) + release_date = get_element_by_id('sound_date', webpage) + description = self._html_search_regex( r'
(.*?)
', webpage, 'description', fatal=False, flags=re.DOTALL) + download_count = int_or_none(self._html_search_regex( + r'Downloaded.*>(\d+)<', webpage, 'downloaded', fatal=False)) + + filesize = float_or_none(parse_filesize(self._search_regex( + r'Filesize
(.*)
', sound_info, 'file size (approx)', fatal=False))) + + if release_date: + release_date = unified_strdate(release_date.replace('th', '')) + + bitdepth = self._html_search_regex( + r'Bitdepth
(.*)
', sound_info, 'Bitdepth', fatal=False) + + channels = self._html_search_regex( + r'Channels
(.*)
', sound_info, 'Channels info', fatal=False) + + formats = [{ + 'url': audio_url, + 'id': music_id, + 'format_id': self._og_search_property('audio:type', webpage, 'audio format', fatal=False), + 'format_note': '{0} {1} {2}'.format(determine_ext(audio_url), bitdepth, channels), + 'filesize_approx': filesize, + 'asr': int_or_none(self._html_search_regex( + r'Samplerate
(\d+).*
', + sound_info, 'samplerate', fatal=False)), + }] + return { 'id': music_id, 'title': title, - 'url': self._og_search_property('audio', webpage, 'music url'), - 'uploader': self._og_search_property('audio:artist', webpage, 'music uploader'), + 'uploader': self._og_search_property('audio:artist', webpage, 'music uploader', fatal=False), 'description': description, + 'duration': duration, + 'tags': [self._html_search_regex(r'>(.*)', t, 'tag', fatal=False) + for t in tags.split('\n') if t.strip()], + 'formats': formats, + 'release_date': release_date, + 'likes_count': download_count, } From cb655f34fbbd741f18e22cb8ec0cae1c4c3bfebe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 Jan 2017 22:39:45 +0700 Subject: [PATCH 0003/1696] [utils] Add more date formats --- test/test_utils.py | 3 +++ youtube_dl/utils.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 3092db5c1..e99bf794e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -295,6 +295,9 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207') + self.assertEqual(unified_strdate('July 15th, 2013'), '20130715') + self.assertEqual(unified_strdate('September 1st, 2013'), '20130901') + self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902') def test_unified_timestamps(self): self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 39dd6c49f..12863e74a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -128,7 +128,13 @@ DATE_FORMATS = ( '%d %B %Y', '%d %b %Y', '%B %d %Y', + '%B %dst %Y', + '%B %dnd %Y', + '%B %dth %Y', '%b %d %Y', + '%b %dst %Y', + '%b %dnd %Y', + '%b %dth %Y', '%b %dst %Y %I:%M', '%b %dnd %Y %I:%M', '%b %dth %Y %I:%M', From 3a407e707ac96bc082fd82325e916802a3b55d36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 Jan 2017 23:03:53 +0700 Subject: [PATCH 0004/1696] [freesound] Improve and remove unrelated metadata (closes #11608) --- youtube_dl/extractor/freesound.py | 73 ++++++++++++++----------------- 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/freesound.py b/youtube_dl/extractor/freesound.py index f0b2400cf..138b6bc58 100644 --- a/youtube_dl/extractor/freesound.py +++ b/youtube_dl/extractor/freesound.py @@ -4,18 +4,15 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, float_or_none, get_element_by_class, get_element_by_id, - int_or_none, - parse_filesize, unified_strdate, ) class FreesoundIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/([^/]+)/sounds/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/[^/]+/sounds/(?P[^/]+)' _TEST = { 'url': 'http://www.freesound.org/people/miklovan/sounds/194503/', 'md5': '12280ceb42c81f19a515c745eae07650', @@ -23,62 +20,60 @@ class FreesoundIE(InfoExtractor): 'id': '194503', 'ext': 'mp3', 'title': 'gulls in the city.wav', - 'uploader': 'miklovan', 'description': 'the sounds of seagulls in the city', + 'duration': 130.233, + 'uploader': 'miklovan', + 'upload_date': '20130715', + 'tags': list, } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - music_id = mobj.group('id') - webpage = self._download_webpage(url, music_id) + audio_id = self._match_id(url) + + webpage = self._download_webpage(url, audio_id) audio_url = self._og_search_property('audio', webpage, 'song url') title = self._og_search_property('audio:title', webpage, 'song title') - duration = float_or_none(get_element_by_class('duration', webpage), scale=1000) - tags = get_element_by_class('tags', webpage) - sound_info = get_element_by_id('sound_information_box', webpage) - release_date = get_element_by_id('sound_date', webpage) description = self._html_search_regex( - r'
(.*?)
', webpage, 'description', - fatal=False, flags=re.DOTALL) + r'(?s)id=["\']sound_description["\'][^>]*>(.+?)
', + webpage, 'description', fatal=False) - download_count = int_or_none(self._html_search_regex( - r'Downloaded.*>(\d+)<', webpage, 'downloaded', fatal=False)) + duration = float_or_none( + get_element_by_class('duration', webpage), scale=1000) - filesize = float_or_none(parse_filesize(self._search_regex( - r'Filesize
(.*)
', sound_info, 'file size (approx)', fatal=False))) + upload_date = unified_strdate(get_element_by_id('sound_date', webpage)) + uploader = self._og_search_property( + 'audio:artist', webpage, 'uploader', fatal=False) - if release_date: - release_date = unified_strdate(release_date.replace('th', '')) + channels = self._html_search_regex( + r'Channels
(.+?)
', webpage, + 'channels info', fatal=False) - bitdepth = self._html_search_regex( - r'Bitdepth
(.*)
', sound_info, 'Bitdepth', fatal=False) + tags_str = get_element_by_class('tags', webpage) + tags = re.findall(r']+>([^<]+)', tags_str) if tags_str else None - channels = self._html_search_regex( - r'Channels
(.*)
', sound_info, 'Channels info', fatal=False) + audio_urls = [audio_url] + + LQ_FORMAT = '-lq.mp3' + if LQ_FORMAT in audio_url: + audio_urls.append(audio_url.replace(LQ_FORMAT, '-hq.mp3')) formats = [{ - 'url': audio_url, - 'id': music_id, - 'format_id': self._og_search_property('audio:type', webpage, 'audio format', fatal=False), - 'format_note': '{0} {1} {2}'.format(determine_ext(audio_url), bitdepth, channels), - 'filesize_approx': filesize, - 'asr': int_or_none(self._html_search_regex( - r'Samplerate
(\d+).*
', - sound_info, 'samplerate', fatal=False)), - }] + 'url': format_url, + 'format_note': channels, + 'quality': quality, + } for quality, format_url in enumerate(audio_urls)] + self._sort_formats(formats) return { - 'id': music_id, + 'id': audio_id, 'title': title, - 'uploader': self._og_search_property('audio:artist', webpage, 'music uploader', fatal=False), 'description': description, 'duration': duration, - 'tags': [self._html_search_regex(r'>(.*)', t, 'tag', fatal=False) - for t in tags.split('\n') if t.strip()], + 'uploader': uploader, + 'upload_date': upload_date, + 'tags': tags, 'formats': formats, - 'release_date': release_date, - 'likes_count': download_count, } From c4251b9aaa9a69e7f7b55197b3907e52b17150d4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Jan 2017 10:08:51 +0100 Subject: [PATCH 0005/1696] [common] add possibility to customize akamai manifest host --- youtube_dl/extractor/common.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6fa7c334e..dce8c7d0d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1967,10 +1967,13 @@ class InfoExtractor(object): entries.append(media_info) return entries - def _extract_akamai_formats(self, manifest_url, video_id): + def _extract_akamai_formats(self, manifest_url, video_id, hosts={}): formats = [] hdcore_sign = 'hdcore=3.7.0' - f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') + f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') + hds_host = hosts.get('hds') + if hds_host: + f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url) if 'hdcore=' not in f4m_url: f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign f4m_formats = self._extract_f4m_formats( @@ -1978,7 +1981,10 @@ class InfoExtractor(object): for entry in f4m_formats: entry.update({'extra_param_to_segment_url': hdcore_sign}) formats.extend(f4m_formats) - m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') + m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') + hls_host = hosts.get('hls') + if hls_host: + m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url) formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) From 1f393a324191591d895bafc1e4c756951f368b3c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Jan 2017 10:19:53 +0100 Subject: [PATCH 0006/1696] [tv4] improve extraction(closes #11698) - remove check for requires_subscription - extract more formats - extract subtitles --- youtube_dl/extractor/tv4.py | 49 +++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 29f62b970..ad79db92b 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -4,11 +4,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - ExtractorError, int_or_none, parse_iso8601, try_get, - update_url_query, + determine_ext, ) @@ -28,7 +27,7 @@ class TV4IE(InfoExtractor): _TESTS = [ { 'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650', - 'md5': '909d6454b87b10a25aa04c4bdd416a9b', + 'md5': 'cb837212f342d77cec06e6dad190e96d', 'info_dict': { 'id': '2491650', 'ext': 'mp4', @@ -40,7 +39,7 @@ class TV4IE(InfoExtractor): }, { 'url': 'http://www.tv4play.se/iframe/video/3054113', - 'md5': '77f851c55139ffe0ebd41b6a5552489b', + 'md5': 'cb837212f342d77cec06e6dad190e96d', 'info_dict': { 'id': '3054113', 'ext': 'mp4', @@ -75,11 +74,10 @@ class TV4IE(InfoExtractor): # If is_geo_restricted is true, it doesn't necessarily mean we can't download it if info.get('is_geo_restricted'): self.report_warning('This content might not be available in your country due to licensing restrictions.') - if info.get('requires_subscription'): - raise ExtractorError('This content requires subscription.', expected=True) title = info['title'] + subtitles = {} formats = [] # http formats are linked with unresolvable host for kind in ('hls', ''): @@ -87,26 +85,41 @@ class TV4IE(InfoExtractor): 'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id, video_id, 'Downloading sources JSON', query={ 'protocol': kind, - 'videoFormat': 'MP4+WEBVTTS+WEBVTT', + 'videoFormat': 'MP4+WEBVTT', }) - item = try_get(data, lambda x: x['playback']['items']['item'], dict) - manifest_url = item.get('url') - if not isinstance(manifest_url, compat_str): + items = try_get(data, lambda x: x['playback']['items']['item']) + if not items: continue - if kind == 'hls': - formats.extend(self._extract_m3u8_formats( - manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id=kind, fatal=False)) - else: - formats.extend(self._extract_f4m_formats( - update_url_query(manifest_url, {'hdcore': '3.8.0'}), - video_id, f4m_id='hds', fatal=False)) + if isinstance(items, dict): + items = [items] + for item in items: + manifest_url = item.get('url') + if not isinstance(manifest_url, compat_str): + continue + ext = determine_ext(manifest_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=kind, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_akamai_formats( + manifest_url, video_id, { + 'hls': 'tv4play-i.akamaihd.net', + })) + elif ext == 'webvtt': + subtitles = self._merge_subtitles( + subtitles, { + 'sv': [{ + 'url': manifest_url, + 'ext': 'vtt', + }]}) self._sort_formats(formats) return { 'id': video_id, 'title': title, 'formats': formats, + 'subtitles': subtitles, 'description': info.get('description'), 'timestamp': parse_iso8601(info.get('broadcast_date_time')), 'duration': int_or_none(info.get('duration')), From 06e9363b7a21acf6a592780a706b0fdd6b5a2d4e Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Sun, 8 Jan 2017 22:27:28 +0530 Subject: [PATCH 0007/1696] [openload] Fix extraction (closes #10408) Just a minor fix for openload --- youtube_dl/extractor/openload.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 2ce9f3826..3d4ad7dca 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -64,16 +64,17 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) ol_id = self._search_regex( - ']+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)', + ']+id="[^"]+"[^>]*>([0-9]+)', webpage, 'openload ID') - first_two_chars = int(float(ol_id[0:][:2])) + first_three_chars = int(float(ol_id[0:][:3])) + fifth_char = int(float(ol_id[3:5])) urlcode = '' - num = 2 + num = 5 while num < len(ol_id): - urlcode += compat_chr(int(float(ol_id[num:][:3])) - - first_two_chars * int(float(ol_id[num + 3:][:2]))) + urlcode += compat_chr(int(float(ol_id[num:][:3])) + + first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2]))) num += 5 video_url = 'https://openload.co/stream/' + urlcode From fb6a59205e3dc5bb1d37d50ac1161314c0d66cf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Jan 2017 23:55:55 +0700 Subject: [PATCH 0008/1696] [mixcloud] Fix extraction (closes #11674) --- youtube_dl/extractor/mixcloud.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 4ba2310fd..a24b3165a 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -16,7 +16,6 @@ from ..utils import ( clean_html, ExtractorError, OnDemandPagedList, - parse_count, str_to_int, ) @@ -36,7 +35,6 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'dholbach', 'thumbnail': r're:https?://.*\.jpg', 'view_count': int, - 'like_count': int, }, }, { 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', @@ -49,7 +47,6 @@ class MixcloudIE(InfoExtractor): 'uploader_id': 'gillespeterson', 'thumbnail': 're:https?://.*', 'view_count': int, - 'like_count': int, }, }, { 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', @@ -89,26 +86,18 @@ class MixcloudIE(InfoExtractor): song_url = play_info['stream_url'] - PREFIX = ( - r'm-play-on-spacebar[^>]+' - r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') - title = self._html_search_regex( - PREFIX + r'm-title="([^"]+)"', webpage, 'title') + title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title') thumbnail = self._proto_relative_url(self._html_search_regex( - PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', - fatal=False)) + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False)) uploader = self._html_search_regex( - PREFIX + r'm-owner-name="([^"]+)"', - webpage, 'uploader', fatal=False) + r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False) uploader_id = self._search_regex( r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) description = self._og_search_description(webpage) - like_count = parse_count(self._search_regex( - r'\bbutton-favorite[^>]+>.*?]+class=["\']toggle-number[^>]+>\s*([^<]+)', - webpage, 'like count', default=None)) view_count = str_to_int(self._search_regex( [r'([0-9,.]+)'], + r'/listeners/?">([0-9,.]+)', + r'm-tooltip=["\']([\d,.]+) plays'], webpage, 'play count', default=None)) return { @@ -120,7 +109,6 @@ class MixcloudIE(InfoExtractor): 'uploader': uploader, 'uploader_id': uploader_id, 'view_count': view_count, - 'like_count': like_count, } From 9837cb7507e0635755082a7fd2e748c4106fefc4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Jan 2017 23:02:50 +0100 Subject: [PATCH 0009/1696] [ooyala] add support for videos with embedToken(#11684) --- youtube_dl/extractor/generic.py | 9 ++++++++- youtube_dl/extractor/ooyala.py | 14 +++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 86dc79307..ac29ec600 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1939,7 +1939,14 @@ class GenericIE(InfoExtractor): re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage) or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P.{32})[\'"]', webpage)) if mobj is not None: - return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url})) + embed_token = self._search_regex( + r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)', + webpage, 'ooyala embed token', default=None) + return OoyalaIE._build_url_result(smuggle_url( + mobj.group('ec'), { + 'domain': url, + 'embed_token': embed_token, + })) # Look for multiple Ooyala embeds on SBN network websites mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index c2807d0f6..f00cf745b 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor): _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' - def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None): + def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None, embed_token=None): content_tree = self._download_json(content_tree_url, video_id)['content_tree'] metadata = content_tree[list(content_tree)[0]] embed_code = metadata['embed_code'] @@ -29,7 +29,8 @@ class OoyalaBaseIE(InfoExtractor): self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + compat_urllib_parse_urlencode({ 'domain': domain, - 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds', + 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth', + 'embedToken': embed_token, }), video_id) cur_auth_data = auth_data['authorization_data'][embed_code] @@ -52,6 +53,12 @@ class OoyalaBaseIE(InfoExtractor): elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif delivery_type == 'hds' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + s_url, embed_code, mpd_id='dash', fatal=False)) + elif delivery_type == 'smooth': + self._extract_ism_formats( + s_url, embed_code, ism_id='mss', fatal=False) elif ext == 'smil': formats.extend(self._extract_smil_formats( s_url, embed_code, fatal=False)) @@ -146,8 +153,9 @@ class OoyalaIE(OoyalaBaseIE): embed_code = self._match_id(url) domain = smuggled_data.get('domain') supportedformats = smuggled_data.get('supportedformats') + embed_token = smuggled_data.get('embed_token') content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code) - return self._extract(content_tree_url, embed_code, domain, supportedformats) + return self._extract(content_tree_url, embed_code, domain, supportedformats, embed_token) class OoyalaExternalIE(OoyalaBaseIE): From 5e8eebb6009ac3e9f7dfc803d8561174d207c1a2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 13 Jan 2017 23:06:07 +0100 Subject: [PATCH 0010/1696] [mitele] extract dash formats --- youtube_dl/extractor/mitele.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 8984d3b8d..79e0b8ada 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -190,7 +190,7 @@ class MiTeleIE(InfoExtractor): return { '_type': 'url_transparent', # for some reason only HLS is supported - 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}), + 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8,dash'}), 'id': video_id, 'title': title, 'description': description, From adf063dad1792f0c9c680d13ccd984b4ad60ac29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 06:17:03 +0700 Subject: [PATCH 0011/1696] [mtv,cc,cmt,spike] Improve and refactor - Eliminate _transform_rtmp_url * Generalize triforce mgid extraction + [cmt] Add support for full-episodes (closes #11623) --- youtube_dl/extractor/cmt.py | 25 ++++++------ youtube_dl/extractor/comedycentral.py | 17 +------- youtube_dl/extractor/mtv.py | 58 ++++++++++++++++++--------- youtube_dl/extractor/spike.py | 2 +- 4 files changed, 54 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index 7d3e9b0c9..6302b8d9c 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -1,13 +1,11 @@ from __future__ import unicode_literals from .mtv import MTVIE -from ..utils import ExtractorError class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P\d+)' - _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes)/(?P[^/]+)' _TESTS = [{ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', @@ -35,15 +33,16 @@ class CMTIE(MTVIE): 'only_matching': True, }] - @classmethod - def _transform_rtmp_url(cls, rtmp_video_url): - if 'error_not_available.swf' in rtmp_video_url: - raise ExtractorError( - '%s said: video is not available' % cls.IE_NAME, expected=True) - - return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url) - def _extract_mgid(self, webpage): - return self._search_regex( + mgid = self._search_regex( r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P.+?)\1', - webpage, 'mgid', group='mgid') + webpage, 'mgid', group='mgid', default=None) + if not mgid: + mgid = self._extract_triforce_mgid(webpage) + return mgid + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mgid = self._extract_mgid(webpage) + return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 816e0bfb6..4cac29415 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -48,17 +48,8 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - - feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed') - feed = self._parse_json(feed_json, playlist_id) - zones = feed['manifest']['zones'] - - video_zone = zones['t2_lc_promo1'] - feed = self._download_json(video_zone['feed'], playlist_id) - mgid = feed['result']['data']['id'] - + mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1') videos_info = self._get_videos_info(mgid) - return videos_info @@ -94,12 +85,6 @@ class ToshIE(MTVServicesInfoExtractor): 'only_matching': True, }] - @classmethod - def _transform_rtmp_url(cls, rtmp_video_url): - new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url) - new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm') - return new_urls - class ComedyCentralTVIE(MTVServicesInfoExtractor): _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P[^/?#&]+)' diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 5250db212..00a980c7d 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -13,11 +13,11 @@ from ..utils import ( fix_xml_ampersands, float_or_none, HEADRequest, - NO_DEFAULT, RegexNotFoundError, sanitized_Request, strip_or_none, timeconvert, + try_get, unescapeHTML, update_url_query, url_basename, @@ -42,15 +42,6 @@ class MTVServicesInfoExtractor(InfoExtractor): # Remove the templates, like &device={device} return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) - # This was originally implemented for ComedyCentral, but it also works here - @classmethod - def _transform_rtmp_url(cls, rtmp_video_url): - m = re.match(r'^rtmpe?://.*?/(?Pgsp\..+?/.*)$', rtmp_video_url) - if not m: - return {'rtmp': rtmp_video_url} - base = 'http://viacommtvstrmfs.fplive.net/' - return {'http': base + m.group('finalid')} - def _get_feed_url(self, uri): return self._FEED_URL @@ -91,22 +82,28 @@ class MTVServicesInfoExtractor(InfoExtractor): if rendition.get('method') == 'hls': hls_url = rendition.find('./src').text formats.extend(self._extract_m3u8_formats( - hls_url, video_id, ext='mp4', entry_protocol='m3u8_native')) + hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls')) else: # fms try: _, _, ext = rendition.attrib['type'].partition('/') rtmp_video_url = rendition.find('./src').text + if 'error_not_available.swf' in rtmp_video_url: + raise ExtractorError( + '%s said: video is not available' % self.IE_NAME, + expected=True) if rtmp_video_url.endswith('siteunavail.png'): continue - new_urls = self._transform_rtmp_url(rtmp_video_url) formats.extend([{ - 'ext': 'flv' if new_url.startswith('rtmp') else ext, - 'url': new_url, - 'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])), + 'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext, + 'url': rtmp_video_url, + 'format_id': '-'.join(filter(None, [ + 'rtmp' if rtmp_video_url.startswith('rtmp') else None, + rendition.get('bitrate')])), 'width': int(rendition.get('width')), 'height': int(rendition.get('height')), - } for kind, new_url in new_urls.items()]) + }]) except (KeyError, TypeError): raise ExtractorError('Invalid rendition field.') self._sort_formats(formats) @@ -212,7 +209,28 @@ class MTVServicesInfoExtractor(InfoExtractor): [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')], playlist_title=title, playlist_description=description) - def _extract_mgid(self, webpage, default=NO_DEFAULT): + def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): + triforce_feed = self._parse_json(self._search_regex( + r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, + 'triforce feed', default='{}'), video_id, fatal=False) + + data_zone = self._search_regex( + r'data-zone=(["\'])(?P.+?_lc_promo.*?)\1', webpage, + 'data zone', default=data_zone, group='zone') + + feed_url = try_get( + triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'], + compat_str) + if not feed_url: + return + + feed = self._download_json(feed_url, video_id, fatal=False) + if not feed: + return + + return try_get(feed, lambda x: x['result']['data']['id'], compat_str) + + def _extract_mgid(self, webpage): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -232,7 +250,11 @@ class MTVServicesInfoExtractor(InfoExtractor): sm4_embed = self._html_search_meta( 'sm4:video:embed', webpage, 'sm4 embed', default='') mgid = self._search_regex( - r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default) + r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None) + + if not mgid: + mgid = self._extract_triforce_mgid(webpage) + return mgid def _real_extract(self, url): diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index abfee3ece..c59896a17 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -46,7 +46,7 @@ class SpikeIE(MTVServicesInfoExtractor): _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') def _extract_mgid(self, webpage): - mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None) + mgid = super(SpikeIE, self)._extract_mgid(webpage) if mgid is None: url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') video_type, episode_id = url_parts.split('/', 1) From e54fc0524ebf7e3ec02fbd22f00fce466c952791 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 06:23:24 +0700 Subject: [PATCH 0012/1696] [cmt] Add support for video-clips --- youtube_dl/extractor/cmt.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index 6302b8d9c..f6b794fb3 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -5,7 +5,7 @@ from .mtv import MTVIE class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes)/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes|video-clips)/(?P[^/]+)' _TESTS = [{ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', @@ -31,6 +31,12 @@ class CMTIE(MTVIE): }, { 'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172', 'only_matching': True, + }, { + 'url': 'http://www.cmt.com/full-episodes/537qb3/nashville-the-wayfaring-stranger-season-5-ep-501', + 'only_matching': True, + }, { + 'url': 'http://www.cmt.com/video-clips/t9e4ci/nashville-juliette-in-2-minutes', + 'only_matching': True, }] def _extract_mgid(self, webpage): From 4f66c16f337f3b2250d369b56bc31cfd7de06f89 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Sat, 14 Jan 2017 00:26:11 +0100 Subject: [PATCH 0013/1696] [brightcove:legacy] Fix misplaced backslash in a regexp --- youtube_dl/extractor/brightcove.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index aa2923ccf..2e56d1df9 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -204,7 +204,7 @@ class BrightcoveLegacyIE(InfoExtractor): # // build Brightcove XML # } m = re.search( - r'''(?x)customBC.\createVideo\( + r'''(?x)customBC\.createVideo\( .*? # skipping width and height ["\'](?P\d+)["\']\s*,\s* # playerID ["\'](?PAQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters From 0b94510cd00d50ddda74ba0079f856650f24680e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 07:27:20 +0700 Subject: [PATCH 0014/1696] [ChangeLog] Actualize --- ChangeLog | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ChangeLog b/ChangeLog index f1e234507..0106a7ae8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +version + +Core ++ [common] Add ability to customize akamai manifest host ++ [utils] Add more date formats + +Extractors +- [mtv] Eliminate _transform_rtmp_url +* [mtv] Generalize triforce mgid extraction ++ [cmt] Add support for full episodes and video clips (#11623) ++ [mitele] Extract DASH formats ++ [ooyala] Add support for videos with embedToken (#11684) +* [mixcloud] Fix extraction (#11674) +* [openload] Fix extraction (#10408) +* [tv4] Improve extraction (#11698) +* [freesound] Fix and improve extraction (#11602) ++ [nick] Add support for beta.nick.com (#11655) +* [mtv,cc] Use HLS by default with native HLS downloader (#11641) +* [mtv] Fix non-HLS extraction + + version 2017.01.10 Extractors From 5d4c7daa49b8ff83aa6fb13b183f47d4427c6513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 07:31:07 +0700 Subject: [PATCH 0015/1696] release 2017.01.14 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6a4c25680..a7bf2b90c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.10 +[debug] youtube-dl version 2017.01.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 0106a7ae8..dba18d39b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.14 Core + [common] Add ability to customize akamai manifest host diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 214124722..17c6f9eb2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.10' +__version__ = '2017.01.14' From abe8cb763fd43ee2db09c73965f38db7db02559e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 08:30:00 +0700 Subject: [PATCH 0016/1696] [cbc] Improve playlist support (closes #11704) --- youtube_dl/extractor/cbc.py | 55 +++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 7c76ceac8..a291685bf 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -90,36 +90,49 @@ class CBCIE(InfoExtractor): }, }], 'skip': 'Geo-restricted to Canada', + }, { + # multiple CBC.APP.Caffeine.initInstance(...) + 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', + 'info_dict': { + 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', + 'id': 'dog-indoor-exercise-winter-1.3928238', + }, + 'playlist_mincount': 6, }] @classmethod def suitable(cls, url): return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) + def _extract_player_init(self, player_init, display_id): + player_info = self._parse_json(player_init, display_id, js_to_json) + media_id = player_info.get('mediaId') + if not media_id: + clip_id = player_info['clipId'] + feed = self._download_json( + 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, + clip_id, fatal=False) + if feed: + media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) + if not media_id: + media_id = self._download_json( + 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, + clip_id)['entries'][0]['id'].split('/')[-1] + return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - player_init = self._search_regex( - r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init', - default=None) - if player_init: - player_info = self._parse_json(player_init, display_id, js_to_json) - media_id = player_info.get('mediaId') - if not media_id: - clip_id = player_info['clipId'] - feed = self._download_json( - 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, - clip_id, fatal=False) - if feed: - media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) - if not media_id: - media_id = self._download_json( - 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, - clip_id)['entries'][0]['id'].split('/')[-1] - return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) - else: - entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage)] - return self.playlist_result(entries) + entries = [ + self._extract_player_init(player_init, display_id) + for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] + entries.extend([ + self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + for media_id in re.findall(r']+src="[^"]+?mediaId=(\d+)"', webpage)]) + return self.playlist_result( + entries, display_id, + self._og_search_title(webpage, fatal=False), + self._og_search_description(webpage)) class CBCPlayerIE(InfoExtractor): From 8854f3fe782e48f4b145eacf58cca533a9f9b199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 08:30:00 +0700 Subject: [PATCH 0017/1696] [README.md] Clarify newline format in cookies section (closes #11709) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 905c1b73f..a606346b2 100644 --- a/README.md +++ b/README.md @@ -841,7 +841,7 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox). -Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. +Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, Mac OS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). From 99d537a5e08499e20c3507c3f84048feacf77522 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 14 Jan 2017 07:12:31 +0100 Subject: [PATCH 0018/1696] [ooyala] fix typo --- youtube_dl/extractor/ooyala.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index f00cf745b..84be2b1e3 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -53,7 +53,7 @@ class OoyalaBaseIE(InfoExtractor): elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif delivery_type == 'hds' or ext == 'mpd': + elif delivery_type == 'dash' or ext == 'mpd': formats.extend(self._extract_mpd_formats( s_url, embed_code, mpd_id='dash', fatal=False)) elif delivery_type == 'smooth': From b80e2ebc8daa1ec30396cfa69836f1d96d23028f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Jan 2017 18:27:22 +0700 Subject: [PATCH 0019/1696] [dramafever] Add support for URLs with language code (#11714) --- youtube_dl/extractor/dramafever.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 1edd8e7bd..bcd9fe2a0 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -66,7 +66,7 @@ class DramaFeverBaseIE(AMPIE): class DramaFeverIE(DramaFeverBaseIE): IE_NAME = 'dramafever' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+/[0-9]+)(?:/|$)' + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+/[0-9]+)(?:/|$)' _TESTS = [{ 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', 'info_dict': { @@ -103,6 +103,9 @@ class DramaFeverIE(DramaFeverBaseIE): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/', + 'only_matching': True, }] def _real_extract(self, url): @@ -148,7 +151,7 @@ class DramaFeverIE(DramaFeverBaseIE): class DramaFeverSeriesIE(DramaFeverBaseIE): IE_NAME = 'dramafever:series' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' _TESTS = [{ 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/', 'info_dict': { From 621a2800ca259399c0c010a1cbc2c56aee90228c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 04:42:05 +0700 Subject: [PATCH 0020/1696] [vevo] Improve geo restriction detection --- youtube_dl/extractor/vevo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index d82261e5e..f0a8075fb 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -206,7 +206,7 @@ class VevoIE(VevoBaseIE): note='Retrieving oauth token', errnote='Unable to retrieve oauth token') - if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: + if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage): self.raise_geo_restricted( '%s said: This page is currently unavailable in your region' % self.IE_NAME) From cd55c6ccd7b9cd0c48d475330c40f382eb0bc625 Mon Sep 17 00:00:00 2001 From: sh!zeeg Date: Wed, 4 Jan 2017 01:51:08 +0300 Subject: [PATCH 0021/1696] [beam:live] Add extractor --- youtube_dl/extractor/beampro.py | 82 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 83 insertions(+) create mode 100644 youtube_dl/extractor/beampro.py diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py new file mode 100644 index 000000000..dc0a2b4af --- /dev/null +++ b/youtube_dl/extractor/beampro.py @@ -0,0 +1,82 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + compat_str, + int_or_none, + parse_iso8601, + try_get, +) + + +class BeamProLiveIE(InfoExtractor): + IE_NAME = 'Beam:live' + _VALID_URL = r'https?://(?:\w+.)?beam.pro/(?P[^?]+)$' + _API_CHANNEL = 'https://beam.pro/api/v1/channels/{0}' + _API_MANIFEST = 'https://beam.pro/api/v1/channels/{0}/manifest.m3u8' + _RATINGS = {'family': 0, 'teen': 13, '18+': 18} + + _TEST = { + 'url': 'http://www.beam.pro/niterhayven', + 'info_dict': { + 'id': '261562', + 'ext': 'mp4', + 'uploader': 'niterhayven', + 'timestamp': 1483477281, + 'age_limit': 18, + 'title': 'Introducing The Witcher 3 // The Grind Starts Now!', + 'thumbnail': r're:https://.*\.jpg$', + 'upload_date': '20170103', + 'uploader_id': 373396, + 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', + 'is_live': True, + }, + 'skip': 'niterhayven is offline', + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + chan_data = self._download_json(self._API_CHANNEL.format(channel_id), channel_id) + + if not chan_data.get('online'): + raise ExtractorError('{0} is offline'.format(channel_id), expected=True) + + formats = self._extract_m3u8_formats( + self._API_MANIFEST.format(chan_data.get('id')), channel_id, ext='mp4') + + self._sort_formats(formats) + info = {} + info['formats'] = formats + if chan_data: + info.update(self._extract_info(chan_data)) + if not info.get('title'): + info['title'] = self._live_title(channel_id) + if not info.get('id'): # barely possible but just in case + info['id'] = compat_str(abs(hash(channel_id)) % (10 ** 8)) + + return info + + def _extract_info(self, info): + thumbnail = try_get(info, lambda x: x['thumbnail']['url'], compat_str) + username = try_get(info, lambda x: x['user']['url'], compat_str) + video_id = compat_str(info['id']) if info.get('id') else None + rating = info.get('audience') + + return { + 'id': video_id, + 'title': info.get('name'), + 'description': clean_html(info.get('description')), + 'age_limit': self._RATINGS[rating] if rating in self._RATINGS else None, + 'is_live': True if info.get('online') else False, + 'timestamp': parse_iso8601(info.get('updatedAt')), + 'uploader': info.get('token') or username, + 'uploader_id': int_or_none(info.get('userId')), + 'view_count': int_or_none(info.get('viewersTotal')), + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5ba8efb0e..9d0610d21 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -88,6 +88,7 @@ from .bbc import ( BBCCoUkPlaylistIE, BBCIE, ) +from .beampro import BeamProLiveIE from .beeg import BeegIE from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE From af62de104f33ebf8b473b3f7935451077fa56ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 06:07:35 +0700 Subject: [PATCH 0022/1696] [beam:live] Improve and simplify (#10702, closes #11596) --- youtube_dl/extractor/beampro.py | 71 ++++++++++++++------------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index dc0a2b4af..f3a9e3278 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -14,25 +14,23 @@ from ..utils import ( class BeamProLiveIE(InfoExtractor): IE_NAME = 'Beam:live' - _VALID_URL = r'https?://(?:\w+.)?beam.pro/(?P[^?]+)$' - _API_CHANNEL = 'https://beam.pro/api/v1/channels/{0}' - _API_MANIFEST = 'https://beam.pro/api/v1/channels/{0}/manifest.m3u8' + _VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P[^/?#&]+)' _RATINGS = {'family': 0, 'teen': 13, '18+': 18} - _TEST = { 'url': 'http://www.beam.pro/niterhayven', 'info_dict': { 'id': '261562', 'ext': 'mp4', - 'uploader': 'niterhayven', - 'timestamp': 1483477281, - 'age_limit': 18, 'title': 'Introducing The Witcher 3 // The Grind Starts Now!', + 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', 'thumbnail': r're:https://.*\.jpg$', + 'timestamp': 1483477281, 'upload_date': '20170103', - 'uploader_id': 373396, - 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', + 'uploader': 'niterhayven', + 'uploader_id': '373396', + 'age_limit': 18, 'is_live': True, + 'view_count': int, }, 'skip': 'niterhayven is offline', 'params': { @@ -41,42 +39,35 @@ class BeamProLiveIE(InfoExtractor): } def _real_extract(self, url): - channel_id = self._match_id(url) - chan_data = self._download_json(self._API_CHANNEL.format(channel_id), channel_id) + channel_name = self._match_id(url) - if not chan_data.get('online'): - raise ExtractorError('{0} is offline'.format(channel_id), expected=True) + chan = self._download_json( + 'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name) - formats = self._extract_m3u8_formats( - self._API_MANIFEST.format(chan_data.get('id')), channel_id, ext='mp4') + if chan.get('online') is False: + raise ExtractorError( + '{0} is offline'.format(channel_name), expected=True) - self._sort_formats(formats) - info = {} - info['formats'] = formats - if chan_data: - info.update(self._extract_info(chan_data)) - if not info.get('title'): - info['title'] = self._live_title(channel_id) - if not info.get('id'): # barely possible but just in case - info['id'] = compat_str(abs(hash(channel_id)) % (10 ** 8)) + channel_id = chan['id'] - return info + formats = self._extract_m3u8_formats( + 'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id, + channel_name, ext='mp4', m3u8_id='hls', fatal=False) + self._sort_formats(formats) - def _extract_info(self, info): - thumbnail = try_get(info, lambda x: x['thumbnail']['url'], compat_str) - username = try_get(info, lambda x: x['user']['url'], compat_str) - video_id = compat_str(info['id']) if info.get('id') else None - rating = info.get('audience') + user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id']) return { - 'id': video_id, - 'title': info.get('name'), - 'description': clean_html(info.get('description')), - 'age_limit': self._RATINGS[rating] if rating in self._RATINGS else None, - 'is_live': True if info.get('online') else False, - 'timestamp': parse_iso8601(info.get('updatedAt')), - 'uploader': info.get('token') or username, - 'uploader_id': int_or_none(info.get('userId')), - 'view_count': int_or_none(info.get('viewersTotal')), - 'thumbnail': thumbnail, + 'id': compat_str(chan.get('id') or channel_name), + 'title': self._live_title(chan.get('name') or channel_name), + 'description': clean_html(chan.get('description')), + 'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str), + 'timestamp': parse_iso8601(chan.get('updatedAt')), + 'uploader': chan.get('token') or try_get( + chan, lambda x: x['user']['username'], compat_str), + 'uploader_id': compat_str(user_id) if user_id else None, + 'age_limit': self._RATINGS.get(chan.get('audience')), + 'is_live': True, + 'view_count': int_or_none(chan.get('viewersTotal')), + 'formats': formats, } From 6f0be937473c5d5f60cd8e712287fcee844093d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 06:09:32 +0700 Subject: [PATCH 0023/1696] [YoutubeDL] Improve protocol auto determining (closes #11720) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5d654f55f..41d9a63ee 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1363,7 +1363,7 @@ class YoutubeDL(object): format['ext'] = determine_ext(format['url']).lower() # Automatically determine protocol if missing (useful for format # selection purposes) - if 'protocol' not in format: + if format.get('protocol') is None: format['protocol'] = determine_protocol(format) # Add HTTP headers, so that external programs can use them from the # json output From a7acf868a55b3d734bef564e3392020f18c20422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 Jan 2017 10:34:39 +0700 Subject: [PATCH 0024/1696] [yourupload] Fix extraction (closes #11601) --- youtube_dl/extractor/yourupload.py | 49 +++++++++++++----------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/yourupload.py b/youtube_dl/extractor/yourupload.py index 4ce327845..9fa772838 100644 --- a/youtube_dl/extractor/yourupload.py +++ b/youtube_dl/extractor/yourupload.py @@ -2,44 +2,37 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import urljoin class YourUploadIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)? - (?:yourupload\.com/watch| - embed\.yourupload\.com| - embed\.yucache\.net - )/(?P[A-Za-z0-9]+) - ''' - _TESTS = [ - { - 'url': 'http://yourupload.com/watch/14i14h', - 'md5': '5e2c63385454c557f97c4c4131a393cd', - 'info_dict': { - 'id': '14i14h', - 'ext': 'mp4', - 'title': 'BigBuckBunny_320x180.mp4', - 'thumbnail': r're:^https?://.*\.jpe?g', - } - }, - { - 'url': 'http://embed.yourupload.com/14i14h', - 'only_matching': True, - }, - { - 'url': 'http://embed.yucache.net/14i14h?client_file_id=803349', - 'only_matching': True, - }, - ] + _VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P[A-Za-z0-9]+)' + _TESTS = [{ + 'url': 'http://yourupload.com/watch/14i14h', + 'md5': '5e2c63385454c557f97c4c4131a393cd', + 'info_dict': { + 'id': '14i14h', + 'ext': 'mp4', + 'title': 'BigBuckBunny_320x180.mp4', + 'thumbnail': r're:^https?://.*\.jpe?g', + } + }, { + 'url': 'http://www.yourupload.com/embed/14i14h', + 'only_matching': True, + }, { + 'url': 'http://embed.yourupload.com/14i14h', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - embed_url = 'http://embed.yucache.net/{0:}'.format(video_id) + embed_url = 'http://www.yourupload.com/embed/%s' % video_id + webpage = self._download_webpage(embed_url, video_id) title = self._og_search_title(webpage) - video_url = self._og_search_video_url(webpage) + video_url = urljoin(embed_url, self._og_search_video_url(webpage)) thumbnail = self._og_search_thumbnail(webpage, default=None) return { From 8e4988f1a21184839dcd23d7133c250a43c5ea58 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 15 Jan 2017 22:10:57 +0800 Subject: [PATCH 0025/1696] [niconico] Remove codes for downloading anonymously Apparently Niconico now blocks playing without an account Closes #11170 --- youtube_dl/extractor/niconico.py | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index a104e33f8..7e6c594c8 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -7,7 +7,6 @@ import datetime from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -40,6 +39,7 @@ class NiconicoIE(InfoExtractor): 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', 'duration': 33, }, + 'skip': 'Requires an account', }, { # File downloaded with and without credentials are different, so omit # the md5 field @@ -55,6 +55,7 @@ class NiconicoIE(InfoExtractor): 'timestamp': 1304065916, 'duration': 209, }, + 'skip': 'Requires an account', }, { # 'video exists but is marked as "deleted" # md5 is unstable @@ -65,9 +66,10 @@ class NiconicoIE(InfoExtractor): 'description': 'deleted', 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', 'upload_date': '20071224', - 'timestamp': 1198527840, # timestamp field has different value if logged in + 'timestamp': int, # timestamp field has different value if logged in 'duration': 304, }, + 'skip': 'Requires an account', }, { 'url': 'http://www.nicovideo.jp/watch/so22543406', 'info_dict': { @@ -79,7 +81,8 @@ class NiconicoIE(InfoExtractor): 'upload_date': '20140104', 'uploader': 'アニメロチャンネル', 'uploader_id': '312', - } + }, + 'skip': 'The viewing period of the video you were searching for has expired.', }] _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' @@ -134,23 +137,7 @@ class NiconicoIE(InfoExtractor): 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', video_id, 'Downloading flv info') else: - # Get external player info - ext_player_info = self._download_webpage( - 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) - thumb_play_key = self._search_regex( - r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') - - # Get flv info - flv_info_data = compat_urllib_parse_urlencode({ - 'k': thumb_play_key, - 'v': video_id - }) - flv_info_request = sanitized_Request( - 'http://ext.nicovideo.jp/thumb_watch', flv_info_data, - {'Content-Type': 'application/x-www-form-urlencoded'}) - flv_info_webpage = self._download_webpage( - flv_info_request, video_id, - note='Downloading flv info', errnote='Unable to download flv info') + raise ExtractorError('Niconico videos now require logging in', expected=True) flv_info = compat_urlparse.parse_qs(flv_info_webpage) if 'url' not in flv_info: From dcae7b3fdc6e6812e78c8dba96d671ccf0ab068e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 15 Jan 2017 22:51:54 +0800 Subject: [PATCH 0026/1696] [niconico] Allow login via cookies Some codes are borrowed from #7968, which is by @jlhg Closes #7968 --- ChangeLog | 5 +++++ youtube_dl/extractor/niconico.py | 18 +++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index dba18d39b..029d13426 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version + +Extractors ++ [niconico] Support login via cookies (#7968) + version 2017.01.14 Core diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 7e6c594c8..8baac23e4 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -87,8 +87,6 @@ class NiconicoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' _NETRC_MACHINE = 'niconico' - # Determine whether the downloader used authentication to download video - _AUTHENTICATED = False def _real_initialize(self): self._login() @@ -112,8 +110,6 @@ class NiconicoIE(InfoExtractor): if re.search(r'(?i)

Log in error

', login_results) is not None: self._downloader.report_warning('unable to log in: bad username or password') return False - # Successful login - self._AUTHENTICATED = True return True def _real_extract(self, url): @@ -131,19 +127,19 @@ class NiconicoIE(InfoExtractor): 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, note='Downloading video info page') - if self._AUTHENTICATED: - # Get flv info - flv_info_webpage = self._download_webpage( - 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', - video_id, 'Downloading flv info') - else: - raise ExtractorError('Niconico videos now require logging in', expected=True) + # Get flv info + flv_info_webpage = self._download_webpage( + 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', + video_id, 'Downloading flv info') flv_info = compat_urlparse.parse_qs(flv_info_webpage) if 'url' not in flv_info: if 'deleted' in flv_info: raise ExtractorError('The video has been deleted.', expected=True) + elif 'closed' in flv_info: + raise ExtractorError('Niconico videos now require logging in', + expected=True) else: raise ExtractorError('Unable to find video URL') From 16e2c8f7710bffb462921dbc93adfa6274bd9334 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 16 Jan 2017 00:06:52 +0800 Subject: [PATCH 0027/1696] [brightcove] Recognize another player ID Closes #11688 --- ChangeLog | 1 + youtube_dl/extractor/brightcove.py | 2 +- youtube_dl/extractor/generic.py | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 029d13426..2e0ddd4f6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [brightcove] Recognize another player ID pattern (#11688) + [niconico] Support login via cookies (#7968) version 2017.01.14 diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 2e56d1df9..5c6e99da1 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -179,7 +179,7 @@ class BrightcoveLegacyIE(InfoExtractor): params = {} - playerID = find_param('playerID') + playerID = find_param('playerID') or find_param('playerId') if playerID is None: raise ExtractorError('Cannot find player ID') params['playerID'] = playerID diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ac29ec600..a3ac7d26b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -422,6 +422,26 @@ class GenericIE(InfoExtractor): 'skip_download': True, # m3u8 download }, }, + { + # Brightcove with alternative playerID key + 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html', + 'info_dict': { + 'id': 'nmeth.2062_SV1', + 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research', + }, + 'playlist': [{ + 'info_dict': { + 'id': '2228375078001', + 'ext': 'mp4', + 'title': 'nmeth.2062-sv1', + 'description': 'nmeth.2062-sv1', + 'timestamp': 1363357591, + 'upload_date': '20130315', + 'uploader': 'Nature Publishing Group', + 'uploader_id': '1964492299001', + }, + }], + }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', From 906420cae37ee3c2f48d23c3a4fa0543a66947d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 21:54:47 +0700 Subject: [PATCH 0028/1696] [limelight] Improve and make more robust (closes #11737) + Add support for direct http for videos hosted on video.llnw.net * Check handmade http URLs --- youtube_dl/extractor/limelight.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 905a0e85f..e635f3c4d 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -59,14 +59,26 @@ class LimelightBaseIE(InfoExtractor): format_id = 'rtmp' if stream.get('videoBitRate'): format_id += '-%d' % int_or_none(stream['videoBitRate']) - http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:] - urls.append(http_url) - http_fmt = fmt.copy() - http_fmt.update({ - 'url': http_url, - 'format_id': format_id.replace('rtmp', 'http'), - }) - formats.append(http_fmt) + http_format_id = format_id.replace('rtmp', 'http') + + CDN_HOSTS = ( + ('delvenetworks.com', 'cpl.delvenetworks.com'), + ('video.llnw.net', 's2.content.video.llnw.net'), + ) + for cdn_host, http_host in CDN_HOSTS: + if cdn_host not in rtmp.group('host').lower(): + continue + http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:]) + urls.append(http_url) + if self._is_valid_url(http_url, video_id, http_format_id): + http_fmt = fmt.copy() + http_fmt.update({ + 'url': http_url, + 'format_id': http_format_id, + }) + formats.append(http_fmt) + break + fmt.update({ 'url': rtmp.group('url'), 'play_path': rtmp.group('playpath'), From 0ce8c66fb05fefbe51ac1eca8d3ddbd561b38a54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 22:07:12 +0700 Subject: [PATCH 0029/1696] [options] Include custom conf in final argv (closes #11741) --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 0eb4924b6..0b8c1671d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -867,7 +867,7 @@ def parseOpts(overrideArguments=None): if '--ignore-config' not in system_conf: user_conf = _readUserConf() - argv = system_conf + user_conf + command_line_conf + argv = system_conf + user_conf + custom_conf + command_line_conf opts, args = parser.parse_args(argv) if opts.verbose: for conf_label, conf in ( From 79fc8496c6ab423d591f9ed1a41358d038242bbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:31:50 +0700 Subject: [PATCH 0030/1696] [xiami] Improve extraction (closes #11699) * Relax _VALID_URLs * Improve track metadata extraction --- youtube_dl/extractor/xiami.py | 53 +++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index 86abef257..d017e03de 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -16,7 +16,9 @@ class XiamiBaseIE(InfoExtractor): return webpage def _extract_track(self, track, track_id=None): - title = track['title'] + track_name = track.get('songName') or track.get('name') or track['subName'] + artist = track.get('artist') or track.get('artist_name') or track.get('singers') + title = '%s - %s' % (artist, track_name) if artist else track_name track_url = self._decrypt(track['location']) subtitles = {} @@ -31,9 +33,10 @@ class XiamiBaseIE(InfoExtractor): 'thumbnail': track.get('pic') or track.get('album_pic'), 'duration': int_or_none(track.get('length')), 'creator': track.get('artist', '').split(';')[0], - 'track': title, - 'album': track.get('album_name'), - 'artist': track.get('artist'), + 'track': track_name, + 'track_number': int_or_none(track.get('track')), + 'album': track.get('album_name') or track.get('title'), + 'artist': artist, 'subtitles': subtitles, } @@ -68,14 +71,14 @@ class XiamiBaseIE(InfoExtractor): class XiamiSongIE(XiamiBaseIE): IE_NAME = 'xiami:song' IE_DESC = '虾米音乐' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.xiami.com/song/1775610518', 'md5': '521dd6bea40fd5c9c69f913c232cb57e', 'info_dict': { 'id': '1775610518', 'ext': 'mp3', - 'title': 'Woman', + 'title': 'HONNE - Woman', 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', 'duration': 265, 'creator': 'HONNE', @@ -95,7 +98,7 @@ class XiamiSongIE(XiamiBaseIE): 'info_dict': { 'id': '1775256504', 'ext': 'mp3', - 'title': '悟空', + 'title': '戴荃 - 悟空', 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', 'duration': 200, 'creator': '戴荃', @@ -109,6 +112,26 @@ class XiamiSongIE(XiamiBaseIE): }, }, 'skip': 'Georestricted', + }, { + 'url': 'http://www.xiami.com/song/1775953850', + 'info_dict': { + 'id': '1775953850', + 'ext': 'mp3', + 'title': 'До Скону - Чума Пожирает Землю', + 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', + 'duration': 683, + 'creator': 'До Скону', + 'track': 'Чума Пожирает Землю', + 'track_number': 7, + 'album': 'Ад', + 'artist': 'До Скону', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.xiami.com/song/xLHGwgd07a1', + 'only_matching': True, }] def _real_extract(self, url): @@ -124,7 +147,7 @@ class XiamiPlaylistBaseIE(XiamiBaseIE): class XiamiAlbumIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:album' IE_DESC = '虾米音乐 - 专辑' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[^/?#&]+)' _TYPE = '1' _TESTS = [{ 'url': 'http://www.xiami.com/album/2100300444', @@ -136,28 +159,34 @@ class XiamiAlbumIE(XiamiPlaylistBaseIE): }, { 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', 'only_matching': True, + }, { + 'url': 'http://www.xiami.com/album/URVDji2a506', + 'only_matching': True, }] class XiamiArtistIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:artist' IE_DESC = '虾米音乐 - 歌手' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[^/?#&]+)' _TYPE = '2' - _TEST = { + _TESTS = [{ 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', 'info_dict': { 'id': '2132', }, 'playlist_count': 20, 'skip': 'Georestricted', - } + }, { + 'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99', + 'only_matching': True, + }] class XiamiCollectionIE(XiamiPlaylistBaseIE): IE_NAME = 'xiami:collection' IE_DESC = '虾米音乐 - 精选集' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[^/?#&]+)' _TYPE = '3' _TEST = { 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', From ddd53c392e0b3d3d2c62ba28117a9b07702c5bd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:42:04 +0700 Subject: [PATCH 0031/1696] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2e0ddd4f6..ee59e120c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,22 @@ version +Core +* [options] Apply custom config to final composite configuration (#11741) +* [YoutubeDL] Improve protocol auto determining (#11720) + Extractors +* [xiami] Relax URL regular expressions +* [xiami] Improve track metadata extraction (#11699) ++ [limelight] Check hand-make direct HTTP links ++ [limelight] Add support for direct HTTP links at video.llnw.net (#11737) + [brightcove] Recognize another player ID pattern (#11688) + [niconico] Support login via cookies (#7968) +* [yourupload] Fix extraction (#11601) ++ [beam:live] Add support for beam.pro live streams (#10702, #11596) +* [vevo] Improve geo restriction detection ++ [dramafever] Add support for URLs with language code (#11714) +* [cbc] Improve playlist support (#11704) + version 2017.01.14 From c1c2fe2045911c310fd5d2eda7bbb53ad581d250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 16 Jan 2017 23:44:04 +0700 Subject: [PATCH 0032/1696] release 2017.01.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a7bf2b90c..c04f6246a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.14 +[debug] youtube-dl version 2017.01.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ee59e120c..f6d73f982 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.16 Core * [options] Apply custom config to final composite configuration (#11741) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 0f6c4ec0c..a3c76d5db 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -86,6 +86,7 @@ - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:iplayer:playlist** - **bbc.co.uk:playlist** + - **Beam:live** - **Beatport** - **Beeg** - **BehindKink** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 17c6f9eb2..c20718dd6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.14' +__version__ = '2017.01.16' From c0bd51c090d617811f5e405294dce06f5871d717 Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi Date: Mon, 16 Jan 2017 22:19:52 +0300 Subject: [PATCH 0033/1696] [naver] Support tv.naver.com links --- youtube_dl/extractor/naver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 055070ff5..aba0a9a70 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -12,7 +12,7 @@ from ..utils import ( class NaverIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P\d+)' + _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P\d+)' _TESTS = [{ 'url': 'http://tvcast.naver.com/v/81652', From 8a5f0a6357746d293f7330e40a3cf5823b1b626d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 Jan 2017 21:19:57 +0700 Subject: [PATCH 0034/1696] [naver] Update tests for #11743 --- youtube_dl/extractor/naver.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index aba0a9a70..e8131333f 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -15,7 +15,7 @@ class NaverIE(InfoExtractor): _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P\d+)' _TESTS = [{ - 'url': 'http://tvcast.naver.com/v/81652', + 'url': 'http://tv.naver.com/v/81652', 'info_dict': { 'id': '81652', 'ext': 'mp4', @@ -24,7 +24,7 @@ class NaverIE(InfoExtractor): 'upload_date': '20130903', }, }, { - 'url': 'http://tvcast.naver.com/v/395837', + 'url': 'http://tv.naver.com/v/395837', 'md5': '638ed4c12012c458fefcddfd01f173cd', 'info_dict': { 'id': '395837', @@ -34,6 +34,9 @@ class NaverIE(InfoExtractor): 'upload_date': '20150519', }, 'skip': 'Georestricted', + }, { + 'url': 'http://tvcast.naver.com/v/81652', + 'only_matching': True, }] def _real_extract(self, url): From 136078966b2047b21e9784060cebdc893c643ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 17 Jan 2017 23:14:07 +0700 Subject: [PATCH 0035/1696] [imdb] Extend _VALID_URL (closes #11744) --- youtube_dl/extractor/imdb.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index f0fc8d49a..f95c00c73 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -13,7 +13,7 @@ from ..utils import ( class ImdbIE(InfoExtractor): IE_NAME = 'imdb' IE_DESC = 'Internet Movie Database trailers' - _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P\d+)' + _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-|videoplayer/)vi(?P\d+)' _TESTS = [{ 'url': 'http://www.imdb.com/video/imdb/vi2524815897', @@ -32,6 +32,9 @@ class ImdbIE(InfoExtractor): }, { 'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897', 'only_matching': True, + }, { + 'url': 'http://www.imdb.com/videoplayer/vi1562949145', + 'only_matching': True, }] def _real_extract(self, url): From 4e44598547b02d42aa628506245c40c3d633814e Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Mon, 9 Jan 2017 21:19:55 +0100 Subject: [PATCH 0036/1696] [20min] Fix extraction --- youtube_dl/extractor/twentymin.py | 37 ++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index b721ecb0a..68d5a0cb5 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -13,10 +13,10 @@ class TwentyMinutenIE(InfoExtractor): _TESTS = [{ # regular video 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', - 'md5': 'b52d6bc6ea6398e6a38f12cfd418149c', + 'md5': 'e7264320db31eed8c38364150c12496e', 'info_dict': { 'id': '469148', - 'ext': 'flv', + 'ext': 'mp4', 'title': '85 000 Franken für 15 perfekte Minuten', 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' @@ -34,17 +34,29 @@ class TwentyMinutenIE(InfoExtractor): 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' }, 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.', + }, { + # news article with video + 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'md5': '372917ba85ed969e176d287ae54b2f94', + 'info_dict': { + 'id': '523629', + 'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'ext': 'mp4', + 'title': 'So kommen Sie bei Eis und Schnee sicher an', + 'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.', + 'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/83/teaserbreit.jpg', + } }, { # YouTube embed 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184', - 'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f', + 'md5': 'e7e237fd98da2a3cc1422ce683df234d', 'info_dict': { 'id': 'ivM7A7SpDOs', 'ext': 'mp4', 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', 'upload_date': '20160424', - 'uploader': 'RTVCM Castilla-La Mancha', + 'uploader': 'CMM Castilla-La Mancha Media', 'uploader_id': 'RTVCM', }, 'add_ie': ['Youtube'], @@ -77,18 +89,31 @@ class TwentyMinutenIE(InfoExtractor): r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') if not video_id: + params = self._html_search_regex( + r']+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"', + webpage, '20min embed URL') video_id = self._search_regex( - r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id') + r'.*videoId@(\d+)', + params, 'Video Id') description = self._html_search_meta( 'description', webpage, 'description') thumbnail = self._og_search_thumbnail(webpage) + formats = [] + format_preferences = [('sd', ''), ('hd', 'h')] + for format_id, url_extension in format_preferences: + format_url = 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, url_extension) + formats.append({ + 'format_id': format_id, + 'url': format_url, + }) + return { 'id': video_id, 'display_id': display_id, - 'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, + 'formats': formats, } From 538b17a09c6546d58babc5eb4a3abc08dcff2d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:05:11 +0700 Subject: [PATCH 0037/1696] [20min] Improve --- youtube_dl/extractor/twentymin.py | 122 ++++++++++++------------------ 1 file changed, 47 insertions(+), 75 deletions(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index 68d5a0cb5..4fd1aa4bf 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -4,116 +4,88 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import remove_end +from ..utils import ( + int_or_none, + try_get, +) class TwentyMinutenIE(InfoExtractor): IE_NAME = '20min' - _VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P\d+)|(?:[^/]+/)*(?P[^/#?]+))' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?20min\.ch/ + (?: + videotv/*\?.*?\bvid=| + videoplayer/videoplayer\.html\?.*?\bvideoId@ + ) + (?P\d+) + ''' _TESTS = [{ - # regular video 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', 'md5': 'e7264320db31eed8c38364150c12496e', 'info_dict': { 'id': '469148', 'ext': 'mp4', 'title': '85 000 Franken für 15 perfekte Minuten', - 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', - 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' - } - }, { - # news article with video - 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469', - 'md5': 'cd4cbb99b94130cff423e967cd275e5e', - 'info_dict': { - 'id': '469408', - 'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469', - 'ext': 'flv', - 'title': '«Wir müssen mutig nach vorne schauen»', - 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', - 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' + 'thumbnail': r're:https?://.*\.jpg$', }, - 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.', }, { - # news article with video - 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', - 'md5': '372917ba85ed969e176d287ae54b2f94', + 'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629', 'info_dict': { 'id': '523629', - 'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', 'ext': 'mp4', 'title': 'So kommen Sie bei Eis und Schnee sicher an', - 'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.', - 'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/83/teaserbreit.jpg', - } - }, { - # YouTube embed - 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184', - 'md5': 'e7e237fd98da2a3cc1422ce683df234d', - 'info_dict': { - 'id': 'ivM7A7SpDOs', - 'ext': 'mp4', - 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', - 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', - 'upload_date': '20160424', - 'uploader': 'CMM Castilla-La Mancha Media', - 'uploader_id': 'RTVCM', + 'description': 'md5:117c212f64b25e3d95747e5276863f7d', + 'thumbnail': r're:https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, }, - 'add_ie': ['Youtube'], }, { 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738', 'only_matching': True, - }, { - 'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411', - 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [m.group('url') for m in re.finditer( + r']+src=(["\'])(?P(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1', + webpage)] + def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id + video_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + video = self._download_json( + 'http://api.20min.ch/video/%s/show' % video_id, + video_id)['content'] - youtube_url = self._html_search_regex( - r']+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"', - webpage, 'YouTube embed URL', default=None) - if youtube_url is not None: - return self.url_result(youtube_url, 'Youtube') + title = video['title'] - title = self._html_search_regex( - r'

.*?(.+?)

', - webpage, 'title', default=None) - if not title: - title = remove_end(re.sub( - r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') + formats = [{ + 'format_id': format_id, + 'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p), + 'quality': quality, + } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])] + self._sort_formats(formats) - if not video_id: - params = self._html_search_regex( - r']+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"', - webpage, '20min embed URL') - video_id = self._search_regex( - r'.*videoId@(\d+)', - params, 'Video Id') + description = video.get('lead') + thumbnail = video.get('thumbnail') - description = self._html_search_meta( - 'description', webpage, 'description') - thumbnail = self._og_search_thumbnail(webpage) + def extract_count(kind): + return try_get( + video, + lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind])) - formats = [] - format_preferences = [('sd', ''), ('hd', 'h')] - for format_id, url_extension in format_preferences: - format_url = 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, url_extension) - formats.append({ - 'format_id': format_id, - 'url': format_url, - }) + like_count = extract_count('up') + dislike_count = extract_count('down') return { 'id': video_id, - 'display_id': display_id, 'title': title, 'description': description, 'thumbnail': thumbnail, + 'like_count': like_count, + 'dislike_count': dislike_count, 'formats': formats, } From b687c85eab942553e925256ad10de693227ba553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:08:31 +0700 Subject: [PATCH 0038/1696] [extractor/generic] Add support for 20 minuten embeds (closes #11683, closes #11751) --- youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a3ac7d26b..154545df7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -78,6 +78,7 @@ from .vbox7 import Vbox7IE from .dbtv import DBTVIE from .piksel import PikselIE from .videa import VideaIE +from .twentymin import TwentyMinutenIE class GenericIE(InfoExtractor): @@ -1468,6 +1469,20 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 2, }, + { + # 20 minuten embed + 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', + 'info_dict': { + 'id': '523629', + 'ext': 'mp4', + 'title': 'So kommen Sie bei Eis und Schnee sicher an', + 'description': 'md5:117c212f64b25e3d95747e5276863f7d', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [TwentyMinutenIE.ie_key()], + } # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2421,6 +2436,12 @@ class GenericIE(InfoExtractor): if videa_urls: return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key()) + # Look for 20 minuten embeds + twentymin_urls = TwentyMinutenIE._extract_urls(webpage) + if twentymin_urls: + return _playlist_from_matches( + twentymin_urls, ie=TwentyMinutenIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') From aaf2b7c57a3d2dc9ba12f1aa401cba088e114916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 22:20:11 +0700 Subject: [PATCH 0039/1696] [canalplus] Add fallback for video id (closes #11764) --- youtube_dl/extractor/canalplus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 10cf165bc..b3f76a7b1 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -107,7 +107,7 @@ class CanalplusIE(InfoExtractor): [r']+?videoId=(["\'])(?P\d+)', r'id=["\']canal_video_player(?P\d+)', r'data-video=["\'](?P\d+)'], - webpage, 'video id', group='id') + webpage, 'video id', default=mobj.group('vid'), group='id') info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) video_data = self._download_json(info_url, video_id, 'Downloading video JSON') From baa3e1845b26d9756642325bbb0d58e22025b2ec Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 18 Jan 2017 17:00:15 +0100 Subject: [PATCH 0040/1696] [bilibili] fix extraction(closes #11077) --- youtube_dl/extractor/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 5051934ef..85ea5e6ee 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -34,8 +34,8 @@ class BiliBiliIE(InfoExtractor): }, } - _APP_KEY = '6f90a59ac58a4123' - _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' + _APP_KEY = '84956560bc028eb7' + _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' def _real_extract(self, url): video_id = self._match_id(url) From 460f61fac42592eb273b7d58efc314cc83687b8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:06:46 +0700 Subject: [PATCH 0041/1696] [ChangeLog] Actualize --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index f6d73f982..994895edc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +version + +Extractors +* [bilibili] Fix extraction (#11077) ++ [canalplus] Add fallback for video id (#11764) +* [20min] Fix extraction (#11683, #11751) +* [imdb] Extend URL regular expression (#11744) ++ [naver] Add support for tv.naver.com links (#11743) + + version 2017.01.16 Core From 1560baacc677c43c1007acfc89b8190f81a59684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:10:00 +0700 Subject: [PATCH 0042/1696] release 2017.01.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c04f6246a..38cb13a33 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.16 +[debug] youtube-dl version 2017.01.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 994895edc..5aa4e3c6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.18 Extractors * [bilibili] Fix extraction (#11077) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c20718dd6..669f60f65 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.16' +__version__ = '2017.01.18' From f1e70fc2ff6f1536873ed73ffc9bff63653fd5ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 18 Jan 2017 23:34:11 +0700 Subject: [PATCH 0043/1696] [mtv] Relax triforce feed regex (closes #11766) --- youtube_dl/extractor/mtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 00a980c7d..e48ea2481 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -211,7 +211,7 @@ class MTVServicesInfoExtractor(InfoExtractor): def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): triforce_feed = self._parse_json(self._search_regex( - r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, + r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage, 'triforce feed', default='{}'), video_id, fatal=False) data_zone = self._search_regex( From eb3f008c9e686f38c50511004d5c9a51b2e8cdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 19 Jan 2017 04:49:31 +0700 Subject: [PATCH 0044/1696] [uol] Fix extraction (closes #11770) --- youtube_dl/extractor/uol.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py index c27c64387..e67083004 100644 --- a/youtube_dl/extractor/uol.py +++ b/youtube_dl/extractor/uol.py @@ -84,12 +84,27 @@ class UOLIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - if not video_id.isdigit(): - embed_page = self._download_webpage('https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, video_id) - video_id = self._search_regex(r'mediaId=(\d+)', embed_page, 'media id') + media_id = None + + if video_id.isdigit(): + media_id = video_id + + if not media_id: + embed_page = self._download_webpage( + 'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, + video_id, 'Downloading embed page', fatal=False) + if embed_page: + media_id = self._search_regex( + (r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'), + embed_page, 'media id', default=None) + + if not media_id: + webpage = self._download_webpage(url, video_id) + media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id') + video_data = self._download_json( - 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % video_id, - video_id)['item'] + 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id, + media_id)['item'] title = video_data['title'] query = { @@ -118,7 +133,7 @@ class UOLIE(InfoExtractor): tags.append(tag_description) return { - 'id': video_id, + 'id': media_id, 'title': title, 'description': clean_html(video_data.get('desMedia')), 'thumbnail': video_data.get('thumbnail'), From cccd70a2752ad079ed560e42ff085adcabebaac2 Mon Sep 17 00:00:00 2001 From: james mike dupont Date: Thu, 19 Jan 2017 04:18:13 -0500 Subject: [PATCH 0045/1696] untie --- youtube_dl/extractor/flipagram.py | 2 +- youtube_dl/extractor/vimeo.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py index 1902a2393..b7be40f1b 100644 --- a/youtube_dl/extractor/flipagram.py +++ b/youtube_dl/extractor/flipagram.py @@ -81,7 +81,7 @@ class FlipagramIE(InfoExtractor): 'filesize': int_or_none(cover.get('size')), } for cover in flipagram.get('covers', []) if cover.get('url')] - # Note that this only retrieves comments that are initally loaded. + # Note that this only retrieves comments that are initially loaded. # For videos with large amounts of comments, most won't be retrieved. comments = [] for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []): diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 2e98b0e6f..add753635 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -338,7 +338,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'expected_warnings': ['Unable to download JSON metadata'], }, { - # redirects to ondemand extractor and should be passed throught it + # redirects to ondemand extractor and should be passed through it # for successful extraction 'url': 'https://vimeo.com/73445910', 'info_dict': { From 1fe84be0f3b36822af804db6cf7c06a1ac5ac688 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 Jan 2017 00:47:04 +0700 Subject: [PATCH 0046/1696] [1tv] Add support for hls (closes #11786) --- youtube_dl/extractor/firsttv.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index c6fb67057..081c71842 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -86,18 +86,43 @@ class FirstTVIE(InfoExtractor): title = item['title'] quality = qualities(QUALITIES) formats = [] + path = None for f in item.get('mbr', []): src = f.get('src') if not src or not isinstance(src, compat_str): continue tbr = int_or_none(self._search_regex( r'_(\d{3,})\.mp4', src, 'tbr', default=None)) + if not path: + path = self._search_regex( + r'//[^/]+/(.+?)_\d+\.mp4', src, + 'm3u8 path', default=None) formats.append({ 'url': src, 'format_id': f.get('name'), 'tbr': tbr, - 'quality': quality(f.get('name')), + 'source_preference': quality(f.get('name')), }) + # m3u8 URL format is reverse engineered from [1] (search for + # master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru) + # is taken from [2]. + # 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted + # 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834 + if not path and len(formats) == 1: + path = self._search_regex( + r'//[^/]+/(.+?$)', formats[0]['url'], + 'm3u8 path', default=None) + if path: + if len(formats) == 1: + m3u8_path = ',' + else: + tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)] + m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4') + formats.extend(self._extract_m3u8_formats( + 'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8' + % (path, m3u8_path), + display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) self._sort_formats(formats) thumbnail = item.get('poster') or self._og_search_thumbnail(webpage) From d77ac737900eede5e1508b9822e71c8595fe0879 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 21:59:24 +0800 Subject: [PATCH 0047/1696] [ustream] Add UstreamIE._extract_url() Ref: #11547 --- youtube_dl/extractor/generic.py | 8 ++++---- youtube_dl/extractor/ustream.py | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 154545df7..a7c104845 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -79,6 +79,7 @@ from .dbtv import DBTVIE from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE +from .ustream import UstreamIE class GenericIE(InfoExtractor): @@ -2112,10 +2113,9 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'TED') # Look for embedded Ustream videos - mobj = re.search( - r']+?src=(["\'])(?Phttp://www\.ustream\.tv/embed/.+?)\1', webpage) - if mobj is not None: - return self.url_result(mobj.group('url'), 'Ustream') + ustream_url = UstreamIE._extract_url(webpage) + if ustream_url: + return self.url_result(ustream_url, UstreamIE.ie_key()) # Look for embedded arte.tv player mobj = re.search( diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 0c06bf36b..5737d4d16 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -69,6 +69,13 @@ class UstreamIE(InfoExtractor): }, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+?src=(["\'])(?Phttp://www\.ustream\.tv/embed/.+?)\1', webpage) + if mobj is not None: + return mobj.group('url') + def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None): def num_to_hex(n): return hex(n)[2:] From 4447fb23320b9214ab3188717794d00b18887617 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:11:43 +0800 Subject: [PATCH 0048/1696] [cspan] Support Ustream embedded videos Closes #11547 --- ChangeLog | 6 ++++++ youtube_dl/extractor/cspan.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5aa4e3c6b..217971ec6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [cspan] Support Ustream embedded videos (#11547) + + version 2017.01.18 Extractors diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 7e5d4f227..92a827a4b 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -12,6 +12,7 @@ from ..utils import ( ExtractorError, ) from .senateisvp import SenateISVPIE +from .ustream import UstreamIE class CSpanIE(InfoExtractor): @@ -57,12 +58,30 @@ class CSpanIE(InfoExtractor): 'params': { 'skip_download': True, # m3u8 downloads } + }, { + # Ustream embedded video + 'url': 'https://www.c-span.org/video/?114917-1/armed-services', + 'info_dict': { + 'id': '58428542', + 'ext': 'flv', + 'title': 'USHR07 Armed Services Committee', + 'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee', + 'timestamp': 1423060374, + 'upload_date': '20150204', + 'uploader': 'HouseCommittee', + 'uploader_id': '12987475', + }, }] def _real_extract(self, url): video_id = self._match_id(url) video_type = None webpage = self._download_webpage(url, video_id) + + ustream_url = UstreamIE._extract_url(webpage) + if ustream_url: + return self.url_result(ustream_url, UstreamIE.ie_key()) + # We first look for clipid, because clipprog always appears before patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] results = list(filter(None, (re.search(p, webpage) for p in patterns))) From 972efe60c3fdaff83f9b8e7a637ee81f4c27bb64 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:13:54 +0800 Subject: [PATCH 0049/1696] [generic] Remove a dead test The web page does not contain a video anymore Ref: #2694, #2696 --- youtube_dl/extractor/generic.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a7c104845..40201f311 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -589,17 +589,6 @@ class GenericIE(InfoExtractor): 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9', } }, - # Embedded Ustream video - { - 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm', - 'md5': '27b99cdb639c9b12a79bca876a073417', - 'info_dict': { - 'id': '45734260', - 'ext': 'flv', - 'uploader': 'AU SPA: The NSA and Privacy', - 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman' - } - }, # nowvideo embed hidden behind percent encoding { 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', From f3c21cb7a7e2d8685f466368e3142739077498cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 20 Jan 2017 22:25:20 +0800 Subject: [PATCH 0050/1696] [cspan] Fix _TESTS --- youtube_dl/extractor/cspan.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 92a827a4b..d4576160b 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -23,14 +23,13 @@ class CSpanIE(InfoExtractor): 'md5': '94b29a4f131ff03d23471dd6f60b6a1d', 'info_dict': { 'id': '315139', - 'ext': 'mp4', 'title': 'Attorney General Eric Holder on Voting Rights Act Decision', - 'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.', }, + 'playlist_mincount': 2, 'skip': 'Regularly fails on travis, for unknown reasons', }, { 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models', - 'md5': '8e5fbfabe6ad0f89f3012a7943c1287b', + # md5 is unstable 'info_dict': { 'id': 'c4486943', 'ext': 'mp4', @@ -39,14 +38,11 @@ class CSpanIE(InfoExtractor): } }, { 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall', - 'md5': '2ae5051559169baadba13fc35345ae74', 'info_dict': { 'id': '342759', - 'ext': 'mp4', 'title': 'General Motors Ignition Switch Recall', - 'duration': 14848, - 'description': 'md5:118081aedd24bf1d3b68b3803344e7f3' }, + 'playlist_mincount': 6, }, { # Video from senate.gov 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', From f4ec8dce481564589419e4dffc45437211daa13f Mon Sep 17 00:00:00 2001 From: Iulian Onofrei Date: Fri, 20 Jan 2017 18:25:04 +0200 Subject: [PATCH 0051/1696] Update README.md (#11787) Add audio format argument dependency warning --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 0b8c1671d..0d2ce8d15 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -751,7 +751,7 @@ def parseOpts(overrideArguments=None): help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option( '--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') + help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default; No effect without -x') postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', From 12afdc2ad617dedfd7d60654b8c57b99604332ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Jan 2017 18:10:32 +0700 Subject: [PATCH 0052/1696] [youtube] Extract episode metadata (closes #9695, closes #11774) --- youtube_dl/extractor/youtube.py | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e6b840735..63597dd16 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -864,6 +864,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, + { + # YouTube Red video with episode data + 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4', + 'info_dict': { + 'id': 'iqKdEhx-dD4', + 'ext': 'mp4', + 'title': 'Isolation - Mind Field (Ep 1)', + 'description': 'md5:3a72f23c086a1496c9e2c54a25fa0822', + 'upload_date': '20170118', + 'uploader': 'Vsauce', + 'uploader_id': 'Vsauce', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce', + 'license': 'Standard YouTube License', + 'series': 'Mind Field', + 'season_number': 1, + 'episode_number': 1, + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': [ + 'Skipping DASH manifest', + ], + }, { # itag 212 'url': '1t24XAntNCY', @@ -1454,6 +1478,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: video_alt_title = video_creator = None + m_episode = re.search( + r']+id="watch7-headline"[^>]*>\s*]*>.*?>(?P[^<]+)\s*S(?P\d+)\s*•\s*E(?P\d+)', + video_webpage) + if m_episode: + series = m_episode.group('series') + season_number = int(m_episode.group('season')) + episode_number = int(m_episode.group('episode')) + else: + series = season_number = episode_number = None + m_cat_container = self._search_regex( r'(?s)]*>\s*Category\s*\s*]*>(.*?)', video_webpage, 'categories', default=None) @@ -1743,6 +1777,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'is_live': is_live, 'start_time': start_time, 'end_time': end_time, + 'series': series, + 'season_number': season_number, + 'episode_number': episode_number, } From 04a3d4d23472ffa4a482d8ebf2d8fdbb3e974327 Mon Sep 17 00:00:00 2001 From: ha shao Date: Sat, 21 Jan 2017 15:47:39 +0800 Subject: [PATCH 0053/1696] [vimeo:channel] Extract videos' titles for playlist entries --- youtube_dl/extractor/vimeo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index add753635..a6bbd4c05 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -730,12 +730,12 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): # Try extracting href first since not all videos are available via # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729) clips = re.findall( - r'id="clip_(\d+)"[^>]*>\s*]+href="(/(?:[^/]+/)*\1)', webpage) + r'id="clip_(\d+)"[^>]*>\s*]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage) if clips: - for video_id, video_url in clips: + for video_id, video_url, video_title in clips: yield self.url_result( compat_urlparse.urljoin(base_url, video_url), - VimeoIE.ie_key(), video_id=video_id) + VimeoIE.ie_key(), video_id=video_id, video_title=video_title) # More relaxed fallback else: for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): From 7c20b7484cc91a4818a98ca8d5b7ef94d5c38fb8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 22 Jan 2017 02:06:34 +0800 Subject: [PATCH 0054/1696] [nextmedia] Support redirected URLs --- ChangeLog | 1 + youtube_dl/extractor/nextmedia.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 217971ec6..00c8a063f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [nextmedia] Support redirected URLs + [cspan] Support Ustream embedded videos (#11547) diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index c900f232a..626ed8b49 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import parse_iso8601 @@ -30,6 +31,12 @@ class NextMediaIE(InfoExtractor): return self._extract_from_nextmedia_page(news_id, url, page) def _extract_from_nextmedia_page(self, news_id, url, page): + redirection_url = self._search_regex( + r'window\.location\.href\s*=\s*([\'"])(?P(?!\1).+)\1', + page, 'redirection URL', default=None, group='url') + if redirection_url: + return self.url_result(compat_urlparse.urljoin(url, redirection_url)) + title = self._fetch_title(page) video_url = self._search_regex(self._URL_PATTERN, page, 'video url') @@ -93,7 +100,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/[^/]+/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', @@ -157,6 +164,10 @@ class AppleDailyIE(NextMediaIE): }, { 'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/', 'only_matching': True, + }, { + # Redirected from http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694 + 'url': 'http://ent.appledaily.com.tw/section/article/headline/20150128/36354694', + 'only_matching': True, }] _URL_PATTERN = r'\{url: \'(.+)\'\}' From e84495cd8d7bdb89bbfe233263bd8ad0b448f8cc Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Sat, 21 Jan 2017 15:23:26 +0100 Subject: [PATCH 0055/1696] [azmedien] Add extractor (closes #11785) --- youtube_dl/extractor/azmedientv.py | 87 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 ++ 2 files changed, 91 insertions(+) create mode 100644 youtube_dl/extractor/azmedientv.py diff --git a/youtube_dl/extractor/azmedientv.py b/youtube_dl/extractor/azmedientv.py new file mode 100644 index 000000000..51d46fb94 --- /dev/null +++ b/youtube_dl/extractor/azmedientv.py @@ -0,0 +1,87 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import get_element_by_class + + +class AZMedienTVIE(InfoExtractor): + IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch videos' + _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+(?:/[0-9]+-segment-(?:[^/#]+#)?|#)|#)(?P[^#]+)' + + _TESTS = [{ + # URL with 'segment' + 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', + 'md5': 'fda85ada1299cee517a622bfbc5f6b66', + 'info_dict': { + 'id': '1_2444peh4', + 'ext': 'mov', + 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', + 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', + 'uploader_id': 'TeleZ?ri', + 'upload_date': '20161218', + 'timestamp': 1482084490, + } + }, { + # URL with 'segment' and fragment: + 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', + 'only_matching': True + }, { + # URL with 'episode' and fragment: + 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', + 'only_matching': True + }, { + # URL with 'show' and fragment: + 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + kaltura_partner_id = self._html_search_regex( + r']+src=["\']https?://www\.kaltura\.com/.*/partner_id/([0-9]+)', + webpage, 'Kaltura partner ID') + kaltura_entry_id = self._html_search_regex( + r']+data-id=["\'](.*?)["\'][^>]+data-slug=["\']%s' % video_id, + webpage, 'Kaltura entry ID') + + return self.url_result( + 'kaltura:%s:%s' % (kaltura_partner_id, kaltura_entry_id), + ie=KalturaIE.ie_key()) + + +class AZMedienTVShowIE(InfoExtractor): + IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch shows' + _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' + + _TESTS = [{ + # URL with 'episode': + 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'info_dict': { + 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'title': 'News', + }, + 'playlist_count': 9, + }, { + # URL with 'show' only: + 'url': 'http://www.telezueri.ch/86-show-talktaeglich', + 'only_matching': True + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + + title = get_element_by_class('title-block-cell', webpage) + if title: + title = title.strip() + + entries = [self.url_result(m.group('url'), ie=AZMedienTVIE.ie_key()) for m in re.finditer( + r']+data-real=["\'](?P.+?)["\']', webpage)] + + return self.playlist_result( + entries, show_id, title) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9d0610d21..4cfb3c70f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -77,6 +77,10 @@ from .awaan import ( AWAANLiveIE, AWAANSeasonIE, ) +from .azmedientv import ( + AZMedienTVIE, + AZMedienTVShowIE, +) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE From 94629e537f2f6ed80b19e3863456f9ba8073af36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Jan 2017 02:15:20 +0700 Subject: [PATCH 0056/1696] [azmedien] Improve (closes #11784) --- youtube_dl/extractor/azmedien.py | 132 +++++++++++++++++++++++++++++ youtube_dl/extractor/azmedientv.py | 87 ------------------- youtube_dl/extractor/extractors.py | 6 +- 3 files changed, 135 insertions(+), 90 deletions(-) create mode 100644 youtube_dl/extractor/azmedien.py delete mode 100644 youtube_dl/extractor/azmedientv.py diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py new file mode 100644 index 000000000..059dc6e4b --- /dev/null +++ b/youtube_dl/extractor/azmedien.py @@ -0,0 +1,132 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import ( + get_element_by_class, + strip_or_none, +) + + +class AZMedienBaseIE(InfoExtractor): + def _kaltura_video(self, partner_id, entry_id): + return self.url_result( + 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(), + video_id=entry_id) + + +class AZMedienIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien videos' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + telezueri\.ch| + telebaern\.tv| + telem1\.ch + )/ + [0-9]+-show-[^/\#]+ + (?: + /[0-9]+-episode-[^/\#]+ + (?: + /[0-9]+-segment-(?:[^/\#]+\#)?| + \# + )| + \# + ) + (?P[^\#]+) + ''' + + _TESTS = [{ + # URL with 'segment' + 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', + 'info_dict': { + 'id': '1_2444peh4', + 'ext': 'mov', + 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', + 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', + 'uploader_id': 'TeleZ?ri', + 'upload_date': '20161218', + 'timestamp': 1482084490, + }, + 'params': { + 'skip_download': True, + }, + }, { + # URL with 'segment' and fragment: + 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', + 'only_matching': True + }, { + # URL with 'episode' and fragment: + 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', + 'only_matching': True + }, { + # URL with 'show' and fragment: + 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + partner_id = self._search_regex( + r']+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)', + webpage, 'kaltura partner id') + entry_id = self._html_search_regex( + r']+data-id=(["\'])(?P(?:(?!\1).)+)\1[^>]+data-slug=["\']%s' + % re.escape(video_id), webpage, 'kaltura entry id', group='id') + + return self._kaltura_video(partner_id, entry_id) + + +class AZMedienShowIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien shows' + _VALID_URL = r'https?://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' + + _TESTS = [{ + # URL with 'episode' + 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'info_dict': { + 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', + 'title': 'News - Donnerstag, 15. Dezember 2016', + }, + 'playlist_count': 9, + }, { + # URL with 'show' only + 'url': 'http://www.telezueri.ch/86-show-talktaeglich', + 'only_matching': True + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + + entries = [] + + partner_id = self._search_regex( + r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', + webpage, 'kaltura partner id', default=None) + + if partner_id: + entries = [ + self._kaltura_video(partner_id, m.group('id')) + for m in re.finditer( + r'data-id=(["\'])(?P(?:(?!\1).)+)\1', webpage)] + + if not entries: + entries = [ + self.url_result(m.group('url'), ie=AZMedienIE.ie_key()) + for m in re.finditer( + r']+data-real=(["\'])(?Phttp.+?)\1', webpage)] + + title = self._search_regex( + r'episodeShareTitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'title', + default=strip_or_none(get_element_by_class( + 'title-block-cell', webpage)), group='title') + + return self.playlist_result(entries, show_id, title) diff --git a/youtube_dl/extractor/azmedientv.py b/youtube_dl/extractor/azmedientv.py deleted file mode 100644 index 51d46fb94..000000000 --- a/youtube_dl/extractor/azmedientv.py +++ /dev/null @@ -1,87 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from .kaltura import KalturaIE -from ..utils import get_element_by_class - - -class AZMedienTVIE(InfoExtractor): - IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch videos' - _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+(?:/[0-9]+-segment-(?:[^/#]+#)?|#)|#)(?P<id>[^#]+)' - - _TESTS = [{ - # URL with 'segment' - 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom', - 'md5': 'fda85ada1299cee517a622bfbc5f6b66', - 'info_dict': { - 'id': '1_2444peh4', - 'ext': 'mov', - 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', - 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', - 'uploader_id': 'TeleZ?ri', - 'upload_date': '20161218', - 'timestamp': 1482084490, - } - }, { - # URL with 'segment' and fragment: - 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger', - 'only_matching': True - }, { - # URL with 'episode' and fragment: - 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz', - 'only_matching': True - }, { - # URL with 'show' and fragment: - 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch', - 'only_matching': True - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - kaltura_partner_id = self._html_search_regex( - r'<script[^>]+src=["\']https?://www\.kaltura\.com/.*/partner_id/([0-9]+)', - webpage, 'Kaltura partner ID') - kaltura_entry_id = self._html_search_regex( - r'<a[^>]+data-id=["\'](.*?)["\'][^>]+data-slug=["\']%s' % video_id, - webpage, 'Kaltura entry ID') - - return self.url_result( - 'kaltura:%s:%s' % (kaltura_partner_id, kaltura_entry_id), - ie=KalturaIE.ie_key()) - - -class AZMedienTVShowIE(InfoExtractor): - IE_DESC = 'telezueri.ch, telebaern.tv and telem1.ch shows' - _VALID_URL = r'http://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P<id>[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' - - _TESTS = [{ - # URL with 'episode': - 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'info_dict': { - 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016', - 'title': 'News', - }, - 'playlist_count': 9, - }, { - # URL with 'show' only: - 'url': 'http://www.telezueri.ch/86-show-talktaeglich', - 'only_matching': True - }] - - def _real_extract(self, url): - show_id = self._match_id(url) - webpage = self._download_webpage(url, show_id) - - title = get_element_by_class('title-block-cell', webpage) - if title: - title = title.strip() - - entries = [self.url_result(m.group('url'), ie=AZMedienTVIE.ie_key()) for m in re.finditer( - r'<a href=["\']#["\'][^>]+data-real=["\'](?P<url>.+?)["\']', webpage)] - - return self.playlist_result( - entries, show_id, title) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4cfb3c70f..de5f94738 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -77,9 +77,9 @@ from .awaan import ( AWAANLiveIE, AWAANSeasonIE, ) -from .azmedientv import ( - AZMedienTVIE, - AZMedienTVShowIE, +from .azmedien import ( + AZMedienIE, + AZMedienShowIE, ) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE From 42697bab3c4d65a232054d5d5482cc177da12c72 Mon Sep 17 00:00:00 2001 From: einstein95 <einstein95@users.noreply.github.com> Date: Sun, 22 Jan 2017 02:00:38 +1300 Subject: [PATCH 0057/1696] [chaturbate] Fix extraction --- youtube_dl/extractor/chaturbate.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 29a8820d5..1c2f065df 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ExtractorError @@ -31,30 +33,32 @@ class ChaturbateIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - m3u8_url = self._search_regex( - r'src=(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage, - 'playlist', default=None, group='url') + m3u8_urls = re.findall( + r'var hlsSource.+? = (["\'])(?P<url>http.+?\.m3u8)', webpage) - if not m3u8_url: + if not m3u8_urls: error = self._search_regex( [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>', r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'], webpage, 'error', group='error', default=None) if not error: - if any(p not in webpage for p in ( + if any(p in webpage for p in ( self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): error = self._ROOM_OFFLINE if error: raise ExtractorError(error, expected=True) raise ExtractorError('Unable to find stream URL') - formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + formats = [] + for m3u8_url in m3u8_urls: + formats.append(self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')[0]) + self._sort_formats(formats) return { 'id': video_id, 'title': self._live_title(video_id), - 'thumbnail': 'https://cdn-s.highwebmedia.com/uHK3McUtGCG3SMFcd4ZJsRv8/roomimage/%s.jpg' % video_id, + 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id, 'age_limit': self._rta_search(webpage), 'is_live': True, 'formats': formats, From a243abb80d5fdaacc502bc5a2b5cb20d0766e93a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 03:00:10 +0700 Subject: [PATCH 0058/1696] [chaturbate] Improve (closes #11797) --- youtube_dl/extractor/chaturbate.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 1c2f065df..8fbc91c1f 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -33,10 +33,10 @@ class ChaturbateIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - m3u8_urls = re.findall( - r'var hlsSource.+? = (["\'])(?P<url>http.+?\.m3u8)', webpage) + m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer( + r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)] - if not m3u8_urls: + if not m3u8_formats: error = self._search_regex( [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>', r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'], @@ -50,9 +50,12 @@ class ChaturbateIE(InfoExtractor): raise ExtractorError('Unable to find stream URL') formats = [] - for m3u8_url in m3u8_urls: - formats.append(self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')[0]) - + for m3u8_id, m3u8_url in m3u8_formats: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', + # ffmpeg skips segments for fast m3u8 + preference=-10 if m3u8_id == 'fast' else None, + m3u8_id=m3u8_id, fatal=False, live=True)) self._sort_formats(formats) return { From 8d1fbe0cb20fdfab8487bb478c2a002f12c1a5d9 Mon Sep 17 00:00:00 2001 From: einstein95 <einstein95@users.noreply.github.com> Date: Sat, 21 Jan 2017 20:02:55 +1300 Subject: [PATCH 0059/1696] [pornflip] Add extractor (closes #11556) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pornflip.py | 59 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/pornflip.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de5f94738..cfddf5b92 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -724,6 +724,7 @@ from .polskieradio import ( ) from .porn91 import Porn91IE from .porncom import PornComIE +from .pornflip import PornFlipIE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py new file mode 100644 index 000000000..b6077f7cb --- /dev/null +++ b/youtube_dl/extractor/pornflip.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, +) +from ..utils import ( + int_or_none, + try_get, + RegexNotFoundError, +) + + +class PornFlipIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/v/(?P<id>[0-9A-Za-z]{11})' + _TEST = { + 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', + 'md5': '98c46639849145ae1fd77af532a9278c', + 'info_dict': { + 'id': 'wz7DfNhMmep', + 'ext': 'mp4', + 'title': '2 Amateurs swallow make his dream cumshots true', + 'uploader': 'figifoto', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + uploader = self._html_search_regex( + r'<span class="name">\s+<a class="ajax" href=".+>\s+<strong>([^<]+)<', webpage, 'uploader', fatal=False) + flashvars = compat_parse_qs(self._html_search_regex( + r'<embed.+?flashvars="([^"]+)"', + webpage, 'flashvars')) + title = flashvars['video_vars[title]'][0] + thumbnail = try_get(flashvars, lambda x: x['video_vars[big_thumb]'][0]) + formats = [] + for k, v in flashvars.items(): + height = self._search_regex(r'video_vars\[video_urls\]\[(\d+).+?\]', k, 'height', default=None) + if height: + url = v[0] + formats.append({ + 'height': int_or_none(height), + 'url': url + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'uploader': uploader, + 'thumbnail': thumbnail, + 'age_limit': 18, + } From 271808b6b2bd75ec9bdf943a55dbc4737bfa6f81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 03:43:27 +0700 Subject: [PATCH 0060/1696] [pornflip] Improve and extract dash formats (closes #11795) --- youtube_dl/extractor/pornflip.py | 77 +++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py index b6077f7cb..a4a5d390e 100644 --- a/youtube_dl/extractor/pornflip.py +++ b/youtube_dl/extractor/pornflip.py @@ -4,56 +4,89 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_parse_qs, + compat_str, ) from ..utils import ( int_or_none, try_get, - RegexNotFoundError, + unified_timestamp, ) class PornFlipIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornflip\.com/v/(?P<id>[0-9A-Za-z]{11})' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})' + _TESTS = [{ 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', 'md5': '98c46639849145ae1fd77af532a9278c', 'info_dict': { 'id': 'wz7DfNhMmep', 'ext': 'mp4', 'title': '2 Amateurs swallow make his dream cumshots true', - 'uploader': 'figifoto', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 112, + 'timestamp': 1481655502, + 'upload_date': '20161213', + 'uploader_id': '106786', + 'uploader': 'figifoto', + 'view_count': int, 'age_limit': 18, } - } + }, { + 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - uploader = self._html_search_regex( - r'<span class="name">\s+<a class="ajax" href=".+>\s+<strong>([^<]+)<', webpage, 'uploader', fatal=False) - flashvars = compat_parse_qs(self._html_search_regex( - r'<embed.+?flashvars="([^"]+)"', - webpage, 'flashvars')) + + webpage = self._download_webpage( + 'https://www.pornflip.com/v/%s' % video_id, video_id) + + flashvars = compat_parse_qs(self._search_regex( + r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1', + webpage, 'flashvars', group='flashvars')) + title = flashvars['video_vars[title]'][0] - thumbnail = try_get(flashvars, lambda x: x['video_vars[big_thumb]'][0]) - formats = [] - for k, v in flashvars.items(): - height = self._search_regex(r'video_vars\[video_urls\]\[(\d+).+?\]', k, 'height', default=None) - if height: - url = v[0] - formats.append({ - 'height': int_or_none(height), - 'url': url - }) + def flashvar(kind): + return try_get( + flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str) + + formats = [] + for key, value in flashvars.items(): + if not (value and isinstance(value, list)): + continue + format_url = value[0] + if key == 'video_vars[hds_manifest]': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + continue + height = self._search_regex( + r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None) + if not height: + continue + formats.append({ + 'url': format_url, + 'format_id': 'http-%s' % height, + 'height': int_or_none(height), + }) self._sort_formats(formats) + uploader = self._html_search_regex( + (r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)', + r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'), + webpage, 'uploader', fatal=False, group='uploader') + return { 'id': video_id, 'formats': formats, 'title': title, + 'thumbnail': flashvar('big_thumb'), + 'duration': int_or_none(flashvar('duration')), + 'timestamp': unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp')), + 'uploader_id': flashvar('author_id'), 'uploader': uploader, - 'thumbnail': thumbnail, + 'view_count': int_or_none(flashvar('views')), 'age_limit': 18, } From 6c031a35f31717cc1a535d5d808b94967b841a93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 18:57:15 +0700 Subject: [PATCH 0061/1696] [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index 00c8a063f..a814b934c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,16 @@ version <unreleased> Extractors ++ [pornflip] Add support for pornflip.com (#11556, #11795) +* [chaturbate] Fix extraction (#11797, #11802) ++ [azmedien] Add support for AZ Medien sites (#11784, #11785) + [nextmedia] Support redirected URLs ++ [vimeo:channel] Extract videos' titles for playlist entries (#11796) ++ [youtube] Extract episode metadata (#9695, #11774) + [cspan] Support Ustream embedded videos (#11547) ++ [1tv] Add support for HLS videos (#11786) +* [uol] Fix extraction (#11770) +* [mtv] Relax triforce feed regular expression (#11766) version 2017.01.18 From 9d5b29c881f679b1d4270326af4ba6f657807011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jan 2017 18:59:04 +0700 Subject: [PATCH 0062/1696] release 2017.01.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 38cb13a33..30cc27c7b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.18 +[debug] youtube-dl version 2017.01.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a814b934c..beea17e54 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.22 Extractors + [pornflip] Add support for pornflip.com (#11556, #11795) diff --git a/README.md b/README.md index a606346b2..4f677d0cc 100644 --- a/README.md +++ b/README.md @@ -374,7 +374,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo avprobe) --audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; - "best" by default + "best" by default; No effect without -x --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a3c76d5db..b906d443a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -74,6 +74,8 @@ - **awaan:live** - **awaan:season** - **awaan:video** + - **AZMedien**: AZ Medien videos + - **AZMedienShow**: AZ Medien shows - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -572,6 +574,7 @@ - **PolskieRadio** - **PolskieRadioCategory** - **PornCom** + - **PornFlip** - **PornHd** - **PornHub**: PornHub and Thumbzilla - **PornHubPlaylist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 669f60f65..9466c9637 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.18' +__version__ = '2017.01.22' From 30dda24de304dd53fc63dfb5bf4672c2ec747014 Mon Sep 17 00:00:00 2001 From: Gaetan Gilbert <gaetan.gilbert@ens-lyon.fr> Date: Sun, 22 Jan 2017 20:27:38 +0100 Subject: [PATCH 0063/1696] [chirbit] Extract uploader --- youtube_dl/extractor/chirbit.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py index f35df143a..4815b34be 100644 --- a/youtube_dl/extractor/chirbit.py +++ b/youtube_dl/extractor/chirbit.py @@ -19,6 +19,7 @@ class ChirbitIE(InfoExtractor): 'title': 'md5:f542ea253f5255240be4da375c6a5d7e', 'description': 'md5:f24a4e22a71763e32da5fed59e47c770', 'duration': 306, + 'uploader': 'Gerryaudio', }, 'params': { 'skip_download': True, @@ -54,6 +55,9 @@ class ChirbitIE(InfoExtractor): duration = parse_duration(self._search_regex( r'class=["\']c-length["\'][^>]*>([^<]+)', webpage, 'duration', fatal=False)) + uploader = self._search_regex( + r'id=["\']chirbit-username["\'][^>]*>([^<]+)', + webpage, 'uploader', fatal=False) return { 'id': audio_id, @@ -61,6 +65,7 @@ class ChirbitIE(InfoExtractor): 'title': title, 'description': description, 'duration': duration, + 'uploader': uploader, } From a089545e036619a798aa19f33085f2b0b87a1b0a Mon Sep 17 00:00:00 2001 From: Alex Seiler <seileralex@gmail.com> Date: Sun, 22 Jan 2017 20:30:29 +0100 Subject: [PATCH 0064/1696] [azmedien:show] Improve _VALID_URL --- youtube_dl/extractor/azmedien.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index 059dc6e4b..a89f71c20 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -85,7 +85,20 @@ class AZMedienIE(AZMedienBaseIE): class AZMedienShowIE(AZMedienBaseIE): IE_DESC = 'AZ Medien shows' - _VALID_URL = r'https?://(?:www\.)?(?:telezueri\.ch|telebaern\.tv|telem1\.ch)/(?P<id>[0-9]+-show-[^/#]+(?:/[0-9]+-episode-[^/#]+)?)$' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + telezueri\.ch| + telebaern\.tv| + telem1\.ch + )/ + (?P<id>[0-9]+-show-[^/\#]+ + (?: + /[0-9]+-episode-[^/\#]+ + )? + )$ + ''' _TESTS = [{ # URL with 'episode' From 8bc0800d7cf24b17204f0fb3c6e76327ed8d527f Mon Sep 17 00:00:00 2001 From: Grzegorz P <Grzechooo@users.noreply.github.com> Date: Sun, 22 Jan 2017 20:35:38 +0100 Subject: [PATCH 0065/1696] [youtube:playlist] Fix nonexistent/private playlist detection (closes #11604) --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 63597dd16..644653357 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1998,7 +1998,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) - for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page): + # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604) + for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page): match = match.strip() # Check if the playlist exists or is private if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match): From 4201ba13e674788c36ae69fbfbffc4b246717d6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Jan 2017 02:49:56 +0700 Subject: [PATCH 0066/1696] [youtube:playlist] Fix nonexistent/private playlist detection and skip private tests --- youtube_dl/extractor/youtube.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 644653357..5202beb3e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1856,6 +1856,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'title': 'YDL_Empty_List', }, 'playlist_count': 0, + 'skip': 'This playlist is private', }, { 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', @@ -1887,6 +1888,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', }, 'playlist_count': 2, + 'skip': 'This playlist is private', }, { 'note': 'embedded', 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', @@ -2002,11 +2004,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page): match = match.strip() # Check if the playlist exists or is private - if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match): - raise ExtractorError( - 'The playlist doesn\'t exist or is private, use --username or ' - '--netrc to access it.', - expected=True) + mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match) + if mobj: + reason = mobj.group('reason') + message = 'This playlist %s' % reason + if 'private' in reason: + message += ', use --username or --netrc to access it' + message += '.' + raise ExtractorError(message, expected=True) elif re.match(r'[^<]*Invalid parameters[^<]*', match): raise ExtractorError( 'Invalid parameters. Maybe URL is incorrect.', From 6d119c2a6bdd2a987ef2e7553b357bd4a3f18690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Jan 2017 03:50:39 +0700 Subject: [PATCH 0067/1696] [24video] Fix extraction (closes #11811) --- youtube_dl/extractor/twentyfourvideo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 1093a3829..a983ebf05 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -12,7 +12,7 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', @@ -43,7 +43,7 @@ class TwentyFourVideoIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.24video.net/video/view/%s' % video_id, video_id) + 'http://www.24video.sex/video/view/%s' % video_id, video_id) title = self._og_search_title(webpage) description = self._html_search_regex( @@ -69,11 +69,11 @@ class TwentyFourVideoIE(InfoExtractor): # Sets some cookies self._download_xml( - r'http://www.24video.net/video/xml/%s?mode=init' % video_id, + r'http://www.24video.sex/video/xml/%s?mode=init' % video_id, video_id, 'Downloading init XML') video_xml = self._download_xml( - 'http://www.24video.net/video/xml/%s?mode=play' % video_id, + 'http://www.24video.sex/video/xml/%s?mode=play' % video_id, video_id, 'Downloading video XML') video = xpath_element(video_xml, './/video', 'video', fatal=True) From 0c1c6f4b9f97375ffc68cbc9c7276838f7bf8514 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 23 Jan 2017 23:31:43 +0800 Subject: [PATCH 0068/1696] [utils] Add another date format seen in NextTV --- youtube_dl/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 12863e74a..98acc2b45 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -143,6 +143,7 @@ DATE_FORMATS = ( '%Y/%m/%d', '%Y/%m/%d %H:%M', '%Y/%m/%d %H:%M:%S', + '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M:%S.%f', '%d.%m.%Y %H:%M', From bc35ed3fb6fcae88d59fd440b505b9e1a7cf112e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 23 Jan 2017 23:33:30 +0800 Subject: [PATCH 0069/1696] =?UTF-8?q?[nextmedia]=20Add=20support=20for=20N?= =?UTF-8?q?extTV=20(=E5=A3=B9=E9=9B=BB=E8=A6=96)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog | 6 ++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nextmedia.py | 54 +++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index beea17e54..ba2f5cffc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [nextmedia] Add support for NextTV (壹電視) + + version 2017.01.22 Extractors diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cfddf5b92..e23b5d0f6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -598,6 +598,7 @@ from .nextmedia import ( NextMediaIE, NextMediaActionNewsIE, AppleDailyIE, + NextTVIE, ) from .nfb import NFBIE from .nfl import NFLIE diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index 626ed8b49..680f03aad 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -3,7 +3,14 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urlparse -from ..utils import parse_iso8601 +from ..utils import ( + clean_html, + get_element_by_class, + int_or_none, + parse_iso8601, + remove_start, + unified_timestamp, +) class NextMediaIE(InfoExtractor): @@ -184,3 +191,48 @@ class AppleDailyIE(NextMediaIE): def _fetch_description(self, page): return self._html_search_meta('description', page, 'news description') + + +class NextTVIE(InfoExtractor): + IE_DESC = '壹電視' + _VALID_URL = r'https?://(?:www\.)?nexttv\.com\.tw/(?:[^/]+/)+(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.nexttv.com.tw/news/realtime/politics/11779671', + 'info_dict': { + 'id': '11779671', + 'ext': 'mp4', + 'title': '「超收稅」近4千億! 藍議員籲發消費券', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1484825400, + 'upload_date': '20170119', + 'view_count': int, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r'<h1[^>]*>([^<]+)</h1>', webpage, 'title') + + data = self._hidden_inputs(webpage) + + video_url = data['ntt-vod-src-detailview'] + + date_str = get_element_by_class('date', webpage) + timestamp = unified_timestamp(date_str + '+0800') if date_str else None + + view_count = int_or_none(remove_start( + clean_html(get_element_by_class('click', webpage)), '點閱:')) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'thumbnail': data.get('ntt-vod-img-src'), + 'timestamp': timestamp, + 'view_count': view_count, + } From b494d6856c55bd351107fd7266f8ac2eeaee341f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 02:50:49 +0700 Subject: [PATCH 0070/1696] [pluralsight] Fix extraction (closes #11820) --- youtube_dl/extractor/pluralsight.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 0ffd41ecd..5c798e874 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -157,13 +157,10 @@ class PluralsightIE(PluralsightBaseIE): display_id = '%s-%s' % (name, clip_id) - parsed_url = compat_urlparse.urlparse(url) - - payload_url = compat_urlparse.urlunparse(parsed_url._replace( - netloc='app.pluralsight.com', path='player/api/v1/payload')) - course = self._download_json( - payload_url, display_id, headers={'Referer': url})['payload']['course'] + 'https://app.pluralsight.com/player/user/api/v1/player/payload', + display_id, data=urlencode_postdata({'courseId': course_name}), + headers={'Referer': url}) collection = course['modules'] From ee4c091ce5bb3732c3016410230f45f2283e5055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 02:56:19 +0700 Subject: [PATCH 0071/1696] [ChangeLog] Actualize --- ChangeLog | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ChangeLog b/ChangeLog index ba2f5cffc..406301549 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,11 @@ version <unreleased> Extractors +* [pluralsight] Fix extraction (#11820) + [nextmedia] Add support for NextTV (壹電視) +* [24video] Fix extraction (#11811) +* [youtube:playlist] Fix nonexistent and private playlist detection (#11604) ++ [chirbit] Extract uploader (#11809) version 2017.01.22 From c3a65c3de0667b8de4af8fdc8c1eb04a1498e104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 02:58:37 +0700 Subject: [PATCH 0072/1696] release 2017.01.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 30cc27c7b..f771d72c0 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.22** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.22 +[debug] youtube-dl version 2017.01.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 406301549..4bc30cff7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.24 Extractors * [pluralsight] Fix extraction (#11820) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b906d443a..2d28b3f72 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -485,6 +485,7 @@ - **Newstube** - **NextMedia**: 蘋果日報 - **NextMediaActionNews**: 蘋果日報 - 動新聞 + - **NextTV**: 壹電視 - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9466c9637..8a66c2fb9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.22' +__version__ = '2017.01.24' From d61aa5eb37244a04caa09f1f238a4f81366c109b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 22:46:40 +0700 Subject: [PATCH 0073/1696] [vimeo:review] Fix config URL extraction (closes #11821) --- youtube_dl/extractor/vimeo.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a6bbd4c05..c12eeadd4 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -884,10 +884,14 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): def _get_config_url(self, webpage_url, video_id, video_password_verified=False): webpage = self._download_webpage(webpage_url, video_id) - data = self._parse_json(self._search_regex( - r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', - default=NO_DEFAULT if video_password_verified else '{}'), video_id) - config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') + config_url = self._html_search_regex( + r'data-config-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'config URL', default=None, group='url') + if not config_url: + data = self._parse_json(self._search_regex( + r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', + default=NO_DEFAULT if video_password_verified else '{}'), video_id) + config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') if config_url is None: self._verify_video_password(webpage_url, video_id, webpage) config_url = self._get_config_url( From 74af9c700d308e3638db0ff2e4510770f9daf31c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 22:55:49 +0700 Subject: [PATCH 0074/1696] [konserthusetplay] Add support for hls formats (closes #11823) --- youtube_dl/extractor/konserthusetplay.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py index 55291c66f..7e6ea9696 100644 --- a/youtube_dl/extractor/konserthusetplay.py +++ b/youtube_dl/extractor/konserthusetplay.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + determine_ext, float_or_none, int_or_none, ) @@ -42,12 +43,18 @@ class KonserthusetPlayIE(InfoExtractor): player_config = media['playerconfig'] playlist = player_config['playlist'] - source = next(f for f in playlist if f.get('bitrates')) + source = next(f for f in playlist if f.get('bitrates') or f.get('provider')) FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4' formats = [] + m3u8_url = source.get('url') + if m3u8_url and determine_ext(m3u8_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + fallback_url = source.get('fallbackUrl') fallback_format_id = None if fallback_url: From 23b35a634e06d9b92c9650b0d66a3d5d7eb03a54 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 24 Jan 2017 16:55:07 +0100 Subject: [PATCH 0075/1696] [crackle] improve extraction - extract vtt subtitles - extract multiple resolutions for thumbnails - pass geo verification proxy headers - add support for mobile urls --- youtube_dl/extractor/crackle.py | 53 ++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index 25c5e7d04..377fb45e9 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -6,7 +6,7 @@ from ..utils import int_or_none class CrackleIE(InfoExtractor): - _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' + _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' _TEST = { 'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934', 'info_dict': { @@ -31,8 +31,32 @@ class CrackleIE(InfoExtractor): } } + _THUMBNAIL_RES = [ + (120, 90), + (208, 156), + (220, 124), + (220, 220), + (240, 180), + (250, 141), + (315, 236), + (320, 180), + (360, 203), + (400, 300), + (421, 316), + (460, 330), + (460, 460), + (462, 260), + (480, 270), + (587, 330), + (640, 480), + (700, 330), + (700, 394), + (854, 480), + (1024, 1024), + (1920, 1080), + ] + # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx - _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' _MEDIA_FILE_SLOTS = { 'c544.flv': { 'width': 544, @@ -61,17 +85,25 @@ class CrackleIE(InfoExtractor): item = self._download_xml( 'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, - video_id).find('i') + video_id, headers=self.geo_verification_headers()).find('i') title = item.attrib['t'] subtitles = {} formats = self._extract_m3u8_formats( 'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id), video_id, 'mp4', m3u8_id='hls', fatal=None) - thumbnail = None + thumbnails = [] path = item.attrib.get('p') if path: - thumbnail = self._THUMBNAIL_TEMPLATE % path + for width, height in self._THUMBNAIL_RES: + res = '%dx%d' % (width, height) + thumbnails.append({ + 'id': res, + 'url': 'http://images-us-am.crackle.com/%stnl_%s.jpg' % (path, res), + 'width': width, + 'height': height, + 'resolution': res, + }) http_base_url = 'http://ahttp.crackle.com/' + path for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): formats.append({ @@ -86,10 +118,11 @@ class CrackleIE(InfoExtractor): if locale and v: if locale not in subtitles: subtitles[locale] = [] - subtitles[locale] = [{ - 'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v), - 'ext': 'ttml', - }] + for url_ext, ext in (('vtt', 'vtt'), ('xml', 'tt')): + subtitles.setdefault(locale, []).append({ + 'url': '%s/%s%s_%s.%s' % (config_doc.attrib['strSubtitleServer'], path, locale, v, url_ext), + 'ext': ext, + }) self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) return { @@ -100,7 +133,7 @@ class CrackleIE(InfoExtractor): 'series': item.attrib.get('sn'), 'season_number': int_or_none(item.attrib.get('se')), 'episode_number': int_or_none(item.attrib.get('ep')), - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'subtitles': subtitles, 'formats': formats, } From af59bddc4e4a6c260e7966fe75d9d687c3b13b32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jan 2017 23:02:20 +0700 Subject: [PATCH 0076/1696] [konserthusetplay] Extract subtitles (#11823) --- youtube_dl/extractor/konserthusetplay.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py index 7e6ea9696..3ae2aa317 100644 --- a/youtube_dl/extractor/konserthusetplay.py +++ b/youtube_dl/extractor/konserthusetplay.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( determine_ext, float_or_none, @@ -104,6 +105,13 @@ class KonserthusetPlayIE(InfoExtractor): thumbnail = media.get('image') duration = float_or_none(media.get('duration'), 1000) + subtitles = {} + captions = source.get('captionsAvailableLanguages') + if isinstance(captions, dict): + for lang, subtitle_url in captions.items(): + if lang != 'none' and isinstance(subtitle_url, compat_str): + subtitles.setdefault(lang, []).append({'url': subtitle_url}) + return { 'id': video_id, 'title': title, @@ -111,4 +119,5 @@ class KonserthusetPlayIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'subtitles': subtitles, } From c60089c0222433775dcc1305d85b42fc6158c8df Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 25 Jan 2017 07:38:17 +0100 Subject: [PATCH 0077/1696] [afreecatv:global] Add new extractor(closes #11807) --- youtube_dl/extractor/afreecatv.py | 92 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 75b366993..4f6cdb8a2 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -18,6 +18,7 @@ from ..utils import ( class AfreecaTVIE(InfoExtractor): + IE_NAME = 'afreecatv' IE_DESC = 'afreecatv.com' _VALID_URL = r'''(?x) https?:// @@ -143,3 +144,94 @@ class AfreecaTVIE(InfoExtractor): expected=True) return info + + +class AfreecaTVGlobalIE(AfreecaTVIE): + IE_NAME = 'afreecatv:global' + _VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?' + _TESTS = [{ + 'url': 'http://afreeca.tv/36853014/v/58301', + 'info_dict': { + 'id': '58301', + 'title': 'tryhard top100', + 'uploader_id': '36853014', + 'uploader': 'makgi Hearthstone Live!', + }, + 'playlist_count': 3, + }] + + def _real_extract(self, url): + channel_id, video_id = re.match(self._VALID_URL, url).groups() + video_type = 'video' if video_id else 'live' + query = { + 'pt': 'view', + 'bid': channel_id, + } + if video_id: + query['vno'] = video_id + video_data = self._download_json( + 'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type), + video_id or channel_id, query=query)['channel'] + + if video_data.get('result') != 1: + raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg'])) + + title = video_data['title'] + + info = { + 'thumbnail': video_data.get('thumb'), + 'view_count': int_or_none(video_data.get('vcnt')), + 'age_limit': int_or_none(video_data.get('grade')), + 'uploader_id': channel_id, + 'uploader': video_data.get('cname'), + } + + if video_id: + entries = [] + for i, f in enumerate(video_data.get('flist', [])): + video_key = self.parse_video_key(f.get('key', '')) + f_url = f.get('file') + if not video_key or not f_url: + continue + entries.append({ + 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), + 'title': title, + 'upload_date': video_key.get('upload_date'), + 'duration': int_or_none(f.get('length')), + 'url': f_url, + 'protocol': 'm3u8_native', + 'ext': 'mp4', + }) + + info.update({ + 'id': video_id, + 'title': title, + 'duration': int_or_none(video_data.get('length')), + }) + if len(entries) > 1: + info['_type'] = 'multi_video' + info['entries'] = entries + elif len(entries) == 1: + i = entries[0].copy() + i.update(info) + info = i + else: + formats = [] + for s in video_data.get('strm', []): + s_url = s.get('purl') + if not s_url: + continue + # TODO: extract rtmp formats + if s.get('stype') == 'HLS': + formats.extend(self._extract_m3u8_formats( + s_url, channel_id, 'mp4', fatal=False)) + self._sort_formats(formats) + + info.update({ + 'id': channel_id, + 'title': self._live_title(title), + 'is_live': True, + 'formats': formats, + }) + + return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e23b5d0f6..f09b4cf2c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -30,7 +30,10 @@ from .aenetworks import ( AENetworksIE, HistoryTopicIE, ) -from .afreecatv import AfreecaTVIE +from .afreecatv import ( + AfreecaTVIE, + AfreecaTVGlobalIE, +) from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE From b8a03b66601f6af9e6b4009cba634dac6e0d30e6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 25 Jan 2017 07:39:11 +0100 Subject: [PATCH 0078/1696] [srgssr] fix rts video extraction(closes #11831) --- youtube_dl/extractor/srgssr.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 47aa887cc..319a48a7a 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -48,9 +48,6 @@ class SRGSSRIE(InfoExtractor): def _real_extract(self, url): bu, media_type, media_id = re.match(self._VALID_URL, url).groups() - if bu == 'rts': - return self.url_result('rts:%s' % media_id, 'RTS') - media_data = self.get_media_data(bu, media_type, media_id) metadata = media_data['AssetMetadatas']['AssetMetadata'][0] From 17f8deeb481a7aa3079d7e11da2c255f893b9e8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jan 2017 23:27:22 +0700 Subject: [PATCH 0079/1696] [extractor/generic] Add support for openload embeds (closes #11536, closes #11812) --- youtube_dl/extractor/generic.py | 7 +++++++ youtube_dl/extractor/openload.py | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 40201f311..a23486620 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -80,6 +80,7 @@ from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE +from .openload import OpenloadIE class GenericIE(InfoExtractor): @@ -2431,6 +2432,12 @@ class GenericIE(InfoExtractor): return _playlist_from_matches( twentymin_urls, ie=TwentyMinutenIE.ie_key()) + # Look for Openload embeds + openload_urls = OpenloadIE._extract_urls(webpage) + if openload_urls: + return _playlist_from_matches( + openload_urls, ie=OpenloadIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 3d4ad7dca..4893ade5d 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_chr from ..utils import ( @@ -56,6 +58,12 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)', + webpage) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) From c1fa3f46727ccbbb75389ce82753f2e63449ece6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jan 2017 23:28:45 +0700 Subject: [PATCH 0080/1696] [openload] Fallback video extension to mp4 --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 4893ade5d..32289d897 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -101,7 +101,7 @@ class OpenloadIE(InfoExtractor): 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'url': video_url, # Seems all videos have extensions in their titles - 'ext': determine_ext(title), + 'ext': determine_ext(title, 'mp4'), 'subtitles': subtitles, } return info_dict From 2c302cf66b235aed6be5786489f259c0fa993fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jan 2017 23:33:46 +0700 Subject: [PATCH 0081/1696] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4bc30cff7..e0af3f671 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version <unreleased> + +Extractors ++ [openload] Fallback video extension to mp4 ++ [extractor/generic] Add support for Openload embeds (#11536, #11812) +* [srgssr] Fix rts video extraction (#11831) ++ [afreecatv:global] Add support for afreeca.tv (#11807) ++ [crackle] Extract vtt subtitles ++ [crackle] Extract multiple resolutions for thumbnails ++ [crackle] Add support for mobile URLs ++ [konserthusetplay] Extract subtitles (#11823) ++ [konserthusetplay] Add support for HLS videos (#11823) +* [vimeo:review] Fix config URL extraction (#11821) + + version 2017.01.24 Extractors From 2417d41535a907a2da05a8b6490198916279d2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jan 2017 23:36:03 +0700 Subject: [PATCH 0082/1696] release 2017.01.25 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f771d72c0..4d409f785 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.25** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.24 +[debug] youtube-dl version 2017.01.25 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e0af3f671..ff305d7e8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.25 Extractors + [openload] Fallback video extension to mp4 diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2d28b3f72..f640cfcaa 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -33,7 +33,8 @@ - **AdobeTVVideo** - **AdultSwim** - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network - - **AfreecaTV**: afreecatv.com + - **afreecatv**: afreecatv.com + - **afreecatv:global**: afreecatv.com - **AirMozilla** - **AlJazeera** - **Allocine** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8a66c2fb9..c23fe85de 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.24' +__version__ = '2017.01.25' From 556dbe7fe35667cb061dbf0ee84d3a065ad11055 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 26 Jan 2017 21:43:14 +0700 Subject: [PATCH 0083/1696] [youtube] Add fallback for duration extraction (closes #11841) --- youtube_dl/extractor/youtube.py | 36 ++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5202beb3e..630586796 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -40,6 +40,7 @@ from ..utils import ( sanitized_Request, smuggle_url, str_to_int, + try_get, unescapeHTML, unified_strdate, unsmuggle_url, @@ -383,6 +384,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], + 'duration': 10, 'like_count': int, 'dislike_count': int, 'start_time': 1, @@ -402,6 +404,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', 'iconic ep', 'iconic', 'love', 'it'], + 'duration': 180, 'uploader': 'Icona Pop', 'uploader_id': 'IconaPop', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop', @@ -419,6 +422,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'Justin Timberlake - Tunnel Vision (Explicit)', 'alt_title': 'Tunnel Vision', 'description': 'md5:64249768eec3bc4276236606ea996373', + 'duration': 419, 'uploader': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO', @@ -458,6 +462,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], + 'duration': 10, 'like_count': int, 'dislike_count': int, }, @@ -493,6 +498,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'm4a', 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson', 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d', + 'duration': 244, 'uploader': 'AfrojackVEVO', 'uploader_id': 'AfrojackVEVO', 'upload_date': '20131011', @@ -512,6 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'Taylor Swift - Shake It Off', 'alt_title': 'Shake It Off', 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3', + 'duration': 242, 'uploader': 'TaylorSwiftVEVO', 'uploader_id': 'TaylorSwiftVEVO', 'upload_date': '20140818', @@ -529,6 +536,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'T4XJQO3qol8', 'ext': 'mp4', + 'duration': 219, 'upload_date': '20100909', 'uploader': 'The Amazing Atheist', 'uploader_id': 'TheAmazingAtheist', @@ -546,6 +554,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer', 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}', + 'duration': 142, 'uploader': 'The Witcher', 'uploader_id': 'WitcherGame', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame', @@ -562,6 +571,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'Dedication To My Ex (Miss That) (Lyric Video)', 'description': 'md5:33765bb339e1b47e7e72b5490139bb41', + 'duration': 247, 'uploader': 'LloydVEVO', 'uploader_id': 'LloydVEVO', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO', @@ -576,6 +586,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': '__2ABJjxzNo', 'ext': 'mp4', + 'duration': 266, 'upload_date': '20100430', 'uploader_id': 'deadmau5', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5', @@ -596,6 +607,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'lqQg6PlCWgI', 'ext': 'mp4', + 'duration': 6085, 'upload_date': '20150827', 'uploader_id': 'olympic', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic', @@ -615,6 +627,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': '_b-2C3KPAM0', 'ext': 'mp4', 'stretched_ratio': 16 / 9., + 'duration': 85, 'upload_date': '20110310', 'uploader_id': 'AllenMeow', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow', @@ -649,6 +662,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'md5:7b81415841e02ecd4313668cde88737a', 'description': 'md5:116377fd2963b81ec4ce64b542173306', + 'duration': 220, 'upload_date': '20150625', 'uploader_id': 'dorappi2000', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000', @@ -691,6 +705,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)', 'description': 'md5:dc7872fb300e143831327f1bae3af010', + 'duration': 7335, 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', @@ -703,6 +718,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)', 'description': 'md5:dc7872fb300e143831327f1bae3af010', + 'duration': 7337, 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', @@ -715,6 +731,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'teamPGP: Rocket League Noob Stream (grizzle)', 'description': 'md5:dc7872fb300e143831327f1bae3af010', + 'duration': 7337, 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', @@ -727,6 +744,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'teamPGP: Rocket League Noob Stream (zim)', 'description': 'md5:dc7872fb300e143831327f1bae3af010', + 'duration': 7334, 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', @@ -768,6 +786,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21', 'alt_title': 'Dark Walk', 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', + 'duration': 133, 'upload_date': '20151119', 'uploader_id': 'IronSoulElf', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf', @@ -809,10 +828,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'md5:e41008789470fc2533a3252216f1c1d1', 'description': 'md5:a677553cf0840649b731a3024aeff4cc', + 'duration': 721, 'upload_date': '20150127', 'uploader_id': 'BerkmanCenter', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter', - 'uploader': 'BerkmanCenter', + 'uploader': 'The Berkman Klein Center for Internet & Society', 'license': 'Creative Commons Attribution license (reuse allowed)', }, 'params': { @@ -827,6 +847,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders', 'description': 'md5:dda0d780d5a6e120758d1711d062a867', + 'duration': 4060, 'upload_date': '20151119', 'uploader': 'Bernie 2016', 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg', @@ -871,7 +892,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'iqKdEhx-dD4', 'ext': 'mp4', 'title': 'Isolation - Mind Field (Ep 1)', - 'description': 'md5:3a72f23c086a1496c9e2c54a25fa0822', + 'description': 'md5:8013b7ddea787342608f63a13ddc9492', + 'duration': 2085, 'upload_date': '20170118', 'uploader': 'Vsauce', 'uploader_id': 'Vsauce', @@ -1516,11 +1538,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_subtitles = self.extract_subtitles(video_id, video_webpage) automatic_captions = self.extract_automatic_captions(video_id, video_webpage) - if 'length_seconds' not in video_info: - self._downloader.report_warning('unable to extract video duration') - video_duration = None - else: - video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0])) + video_duration = try_get( + video_info, lambda x: int_or_none(x['length_seconds'][0])) + if not video_duration: + video_duration = parse_duration(self._html_search_meta( + 'duration', video_webpage, 'video duration')) # annotations video_annotations = None From cf0cabbe5011228c78a3d88c1a1b179b10333d6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 26 Jan 2017 21:49:34 +0700 Subject: [PATCH 0084/1696] [cmt,mtv,southpark] Add support for episode URLs (closes #11837) --- youtube_dl/extractor/cmt.py | 2 +- youtube_dl/extractor/mtv.py | 5 ++++- youtube_dl/extractor/southpark.py | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index f6b794fb3..e701fbeab 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -5,7 +5,7 @@ from .mtv import MTVIE class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes|video-clips)/(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index e48ea2481..855c3996f 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -304,7 +304,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor): IE_NAME = 'mtv' - _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|full-episodes)/(?P<id>[^/?#.]+)' + _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://www.mtv.com/feeds/mrss/' _TESTS = [{ @@ -321,6 +321,9 @@ class MTVIE(MTVServicesInfoExtractor): }, { 'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101', 'only_matching': True, + }, { + 'url': 'http://www.mtv.com/episodes/g8xu7q/teen-mom-2-breaking-the-wall-season-7-ep-713', + 'only_matching': True, }] diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 08f8c5744..d8ce416fc 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor class SouthParkIE(MTVServicesInfoExtractor): IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' @@ -75,7 +75,7 @@ class SouthParkDeIE(SouthParkIE): class SouthParkNlIE(SouthParkIE): IE_NAME = 'southpark.nl' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/' _TESTS = [{ From 9bccdc7004f48963da9a51b6fe24a398d59da725 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 26 Jan 2017 16:06:01 +0100 Subject: [PATCH 0085/1696] [vevo] remove request to old api and catch apiv2 errors --- youtube_dl/extractor/vevo.py | 267 +++++++++++------------------------ 1 file changed, 79 insertions(+), 188 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index f0a8075fb..c4e37f694 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -4,9 +4,9 @@ import re from .common import InfoExtractor from ..compat import ( - compat_etree_fromstring, compat_str, compat_urlparse, + compat_HTTPError, ) from ..utils import ( ExtractorError, @@ -140,21 +140,6 @@ class VevoIE(VevoBaseIE): 'url': 'http://www.vevo.com/watch/INS171400764', 'only_matching': True, }] - _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com' - _SOURCE_TYPES = { - 0: 'youtube', - 1: 'brightcove', - 2: 'http', - 3: 'hls_ios', - 4: 'hls', - 5: 'smil', # http - 7: 'f4m_cc', - 8: 'f4m_ak', - 9: 'f4m_l3', - 10: 'ism', - 13: 'smil', # rtmp - 18: 'dash', - } _VERSIONS = { 0: 'youtube', # only in AuthenticateVideo videoVersions 1: 'level3', @@ -163,41 +148,6 @@ class VevoIE(VevoBaseIE): 4: 'amazon', } - def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): - formats = [] - els = smil.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') - for el in els: - src = el.attrib['src'] - m = re.match(r'''(?xi) - (?P<ext>[a-z0-9]+): - (?P<path> - [/a-z0-9]+ # The directory and main part of the URL - _(?P<tbr>[0-9]+)k - _(?P<width>[0-9]+)x(?P<height>[0-9]+) - _(?P<vcodec>[a-z0-9]+) - _(?P<vbr>[0-9]+) - _(?P<acodec>[a-z0-9]+) - _(?P<abr>[0-9]+) - \.[a-z0-9]+ # File extension - )''', src) - if not m: - continue - - format_url = self._SMIL_BASE_URL + m.group('path') - formats.append({ - 'url': format_url, - 'format_id': 'smil_' + m.group('tbr'), - 'vcodec': m.group('vcodec'), - 'acodec': m.group('acodec'), - 'tbr': int(m.group('tbr')), - 'vbr': int(m.group('vbr')), - 'abr': int(m.group('abr')), - 'ext': m.group('ext'), - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - return formats - def _initialize_api(self, video_id): req = sanitized_Request( 'http://www.vevo.com/auth', data=b'') @@ -214,148 +164,91 @@ class VevoIE(VevoBaseIE): self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token'] def _call_api(self, path, *args, **kwargs): - return self._download_json(self._api_url_template % path, *args, **kwargs) + try: + data = self._download_json(self._api_url_template % path, *args, **kwargs) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + errors = self._parse_json(e.cause.read().decode(), None)['errors'] + error_message = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) + raise + return data def _real_extract(self, url): video_id = self._match_id(url) - json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id - response = self._download_json( - json_url, video_id, 'Downloading video info', - 'Unable to download info', fatal=False) or {} - video_info = response.get('video') or {} + self._initialize_api(video_id) + + video_info = self._call_api( + 'video/%s' % video_id, video_id, 'Downloading api video info', + 'Failed to download video info') + + video_versions = self._call_api( + 'video/%s/streams' % video_id, video_id, + 'Downloading video versions info', + 'Failed to download video versions info', + fatal=False) + + # Some videos are only available via webpage (e.g. + # https://github.com/rg3/youtube-dl/issues/9366) + if not video_versions: + webpage = self._download_webpage(url, video_id) + video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0] + + uploader = None artist = None featured_artist = None - uploader = None - view_count = None + artists = video_info.get('artists') + for curr_artist in artists: + if curr_artist.get('role') == 'Featured': + featured_artist = curr_artist['name'] + else: + artist = uploader = curr_artist['name'] + formats = [] + for video_version in video_versions: + version = self._VERSIONS.get(video_version['version']) + version_url = video_version.get('url') + if not version_url: + continue - if not video_info: - try: - self._initialize_api(video_id) - except ExtractorError: - ytid = response.get('errorInfo', {}).get('ytid') - if ytid: - self.report_warning( - 'Video is geoblocked, trying with the YouTube video %s' % ytid) - return self.url_result(ytid, 'Youtube', ytid) - - raise - - video_info = self._call_api( - 'video/%s' % video_id, video_id, 'Downloading api video info', - 'Failed to download video info') - - video_versions = self._call_api( - 'video/%s/streams' % video_id, video_id, - 'Downloading video versions info', - 'Failed to download video versions info', - fatal=False) - - # Some videos are only available via webpage (e.g. - # https://github.com/rg3/youtube-dl/issues/9366) - if not video_versions: - webpage = self._download_webpage(url, video_id) - video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0] - - timestamp = parse_iso8601(video_info.get('releaseDate')) - artists = video_info.get('artists') - for curr_artist in artists: - if curr_artist.get('role') == 'Featured': - featured_artist = curr_artist['name'] - else: - artist = uploader = curr_artist['name'] - view_count = int_or_none(video_info.get('views', {}).get('total')) - - for video_version in video_versions: - version = self._VERSIONS.get(video_version['version']) - version_url = video_version.get('url') - if not version_url: + if '.ism' in version_url: + continue + elif '.mpd' in version_url: + formats.extend(self._extract_mpd_formats( + version_url, video_id, mpd_id='dash-%s' % version, + note='Downloading %s MPD information' % version, + errnote='Failed to download %s MPD information' % version, + fatal=False)) + elif '.m3u8' in version_url: + formats.extend(self._extract_m3u8_formats( + version_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls-%s' % version, + note='Downloading %s m3u8 information' % version, + errnote='Failed to download %s m3u8 information' % version, + fatal=False)) + else: + m = re.search(r'''(?xi) + _(?P<width>[0-9]+)x(?P<height>[0-9]+) + _(?P<vcodec>[a-z0-9]+) + _(?P<vbr>[0-9]+) + _(?P<acodec>[a-z0-9]+) + _(?P<abr>[0-9]+) + \.(?P<ext>[a-z0-9]+)''', version_url) + if not m: continue - if '.ism' in version_url: - continue - elif '.mpd' in version_url: - formats.extend(self._extract_mpd_formats( - version_url, video_id, mpd_id='dash-%s' % version, - note='Downloading %s MPD information' % version, - errnote='Failed to download %s MPD information' % version, - fatal=False)) - elif '.m3u8' in version_url: - formats.extend(self._extract_m3u8_formats( - version_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls-%s' % version, - note='Downloading %s m3u8 information' % version, - errnote='Failed to download %s m3u8 information' % version, - fatal=False)) - else: - m = re.search(r'''(?xi) - _(?P<width>[0-9]+)x(?P<height>[0-9]+) - _(?P<vcodec>[a-z0-9]+) - _(?P<vbr>[0-9]+) - _(?P<acodec>[a-z0-9]+) - _(?P<abr>[0-9]+) - \.(?P<ext>[a-z0-9]+)''', version_url) - if not m: - continue - - formats.append({ - 'url': version_url, - 'format_id': 'http-%s-%s' % (version, video_version['quality']), - 'vcodec': m.group('vcodec'), - 'acodec': m.group('acodec'), - 'vbr': int(m.group('vbr')), - 'abr': int(m.group('abr')), - 'ext': m.group('ext'), - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - else: - timestamp = int_or_none(self._search_regex( - r'/Date\((\d+)\)/', - video_info['releaseDate'], 'release date', fatal=False), - scale=1000) - artists = video_info.get('mainArtists') - if artists: - artist = uploader = artists[0]['artistName'] - - featured_artists = video_info.get('featuredArtists') - if featured_artists: - featured_artist = featured_artists[0]['artistName'] - - smil_parsed = False - for video_version in video_info['videoVersions']: - version = self._VERSIONS.get(video_version['version']) - if version == 'youtube': - continue - else: - source_type = self._SOURCE_TYPES.get(video_version['sourceType']) - renditions = compat_etree_fromstring(video_version['data']) - if source_type == 'http': - for rend in renditions.findall('rendition'): - attr = rend.attrib - formats.append({ - 'url': attr['url'], - 'format_id': 'http-%s-%s' % (version, attr['name']), - 'height': int_or_none(attr.get('frameheight')), - 'width': int_or_none(attr.get('frameWidth')), - 'tbr': int_or_none(attr.get('totalBitrate')), - 'vbr': int_or_none(attr.get('videoBitrate')), - 'abr': int_or_none(attr.get('audioBitrate')), - 'vcodec': attr.get('videoCodec'), - 'acodec': attr.get('audioCodec'), - }) - elif source_type == 'hls': - formats.extend(self._extract_m3u8_formats( - renditions.find('rendition').attrib['url'], video_id, - 'mp4', 'm3u8_native', m3u8_id='hls-%s' % version, - note='Downloading %s m3u8 information' % version, - errnote='Failed to download %s m3u8 information' % version, - fatal=False)) - elif source_type == 'smil' and version == 'level3' and not smil_parsed: - formats.extend(self._extract_smil_formats( - renditions.find('rendition').attrib['url'], video_id, False)) - smil_parsed = True + formats.append({ + 'url': version_url, + 'format_id': 'http-%s-%s' % (version, video_version['quality']), + 'vcodec': m.group('vcodec'), + 'acodec': m.group('acodec'), + 'vbr': int(m.group('vbr')), + 'abr': int(m.group('abr')), + 'ext': m.group('ext'), + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) self._sort_formats(formats) track = video_info['title'] @@ -376,17 +269,15 @@ class VevoIE(VevoBaseIE): else: age_limit = None - duration = video_info.get('duration') - return { 'id': video_id, 'title': title, 'formats': formats, 'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'), - 'timestamp': timestamp, + 'timestamp': parse_iso8601(video_info.get('releaseDate')), 'uploader': uploader, - 'duration': duration, - 'view_count': view_count, + 'duration': int_or_none(video_info.get('duration')), + 'view_count': int_or_none(video_info.get('views', {}).get('total')), 'age_limit': age_limit, 'track': track, 'artist': uploader, From b3277115a192b88df34692e42f62f39bd4a65bac Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 26 Jan 2017 16:14:42 +0100 Subject: [PATCH 0086/1696] [disney] Add new extractor(closes #7409)(closes #11801)(#4975)(#11000) --- youtube_dl/extractor/disney.py | 115 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 116 insertions(+) create mode 100644 youtube_dl/extractor/disney.py diff --git a/youtube_dl/extractor/disney.py b/youtube_dl/extractor/disney.py new file mode 100644 index 000000000..396873c6d --- /dev/null +++ b/youtube_dl/extractor/disney.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + unified_strdate, + compat_str, + determine_ext, +) + + +class DisneyIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|starwars\.com))/(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})''' + _TESTS = [{ + 'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977', + 'info_dict': { + 'id': '545ed1857afee5a0ec239977', + 'ext': 'mp4', + 'title': 'Moana - Trailer', + 'description': 'A fun adventure for the entire Family! Bring home Moana on Digital HD Feb 21 & Blu-ray March 7', + 'upload_date': '20170112', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2', + 'only_matching': True, + }, { + 'url': 'http://video.en.disneyme.com/watch/future-worm/robo-carp-2001-544b66002aa7353cdd3f5114', + 'only_matching': True, + }, { + 'url': 'http://video.disneyturkiye.com.tr/izle/7c-7-cuceler/kimin-sesi-zaten-5456f3d015f6b36c8afdd0e2', + 'only_matching': True, + }, { + 'url': 'http://disneyjunior.disney.com/embed/546a4798ddba3d1612e4005d', + 'only_matching': True, + }, { + 'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097', + 'only_matching': True, + }] + + def _real_extract(self, url): + domain, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage( + 'http://%s/embed/%s' % (domain, video_id), video_id) + video_data = self._parse_json(self._search_regex( + r'Disney\.EmbedVideo=({.+});', webpage, 'embed data'), video_id)['video'] + + for external in video_data.get('externals', []): + if external.get('source') == 'vevo': + return self.url_result('vevo:' + external['data_id'], 'Vevo') + + title = video_data['title'] + + formats = [] + for flavor in video_data.get('flavors', []): + flavor_format = flavor.get('format') + flavor_url = flavor.get('url') + if not flavor_url or not re.match(r'https?://', flavor_url): + continue + tbr = int_or_none(flavor.get('bitrate')) + if tbr == 99999: + formats.extend(self._extract_m3u8_formats( + flavor_url, video_id, 'mp4', m3u8_id=flavor_format, fatal=False)) + continue + format_id = [] + if flavor_format: + format_id.append(flavor_format) + if tbr: + format_id.append(compat_str(tbr)) + ext = determine_ext(flavor_url) + if flavor_format == 'applehttp' or ext == 'm3u8': + ext = 'mp4' + width = int_or_none(flavor.get('width')) + height = int_or_none(flavor.get('height')) + formats.append({ + 'format_id': '-'.join(format_id), + 'url': flavor_url, + 'width': width, + 'height': height, + 'tbr': tbr, + 'ext': ext, + 'vcodec': 'none' if (width == 0 and height == 0) else None, + }) + self._sort_formats(formats) + + subtitles = {} + for caption in video_data.get('captions', []): + caption_url = caption.get('url') + caption_format = caption.get('format') + if not caption_url or caption_format.startswith('unknown'): + continue + subtitles.setdefault(caption.get('language', 'en'), []).append({ + 'url': caption_url, + 'ext': { + 'webvtt': 'vtt', + }.get(caption_format, caption_format), + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description') or video_data.get('short_desc'), + 'thumbnail': video_data.get('thumb') or video_data.get('thumb_secure'), + 'duration': int_or_none(video_data.get('duration_sec')), + 'upload_date': unified_strdate(video_data.get('publish_date')), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f09b4cf2c..0c3e081ad 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -251,6 +251,7 @@ from .dumpert import DumpertIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE from .discoverygo import DiscoveryGoIE +from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dropbox import DropboxIE from .dw import ( From c19ef77c3138ecf1ce5c988de2d94031f58b4f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20P=C3=B6schel?= <github@basicmaster.de> Date: Wed, 25 Jan 2017 20:44:03 +0100 Subject: [PATCH 0087/1696] [jamendo] Extract full title --- youtube_dl/extractor/jamendo.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index 51d19e67d..3db07e79f 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -16,7 +16,7 @@ class JamendoIE(InfoExtractor): 'id': '196219', 'display_id': 'stories-from-emona-i', 'ext': 'flac', - 'title': 'Stories from Emona I', + 'title': 'Maya Filipič - Stories from Emona I', 'thumbnail': r're:^https?://.*\.jpg' } } @@ -28,7 +28,7 @@ class JamendoIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - title = self._html_search_meta('name', webpage, 'title') + title = self._search_regex(r'<title>(.*?)\ \|\ Jamendo\ Music\ .*', webpage, 'title') formats = [{ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' @@ -62,21 +62,21 @@ class JamendoAlbumIE(InfoExtractor): 'url': 'https://www.jamendo.com/album/121486/duck-on-cover', 'info_dict': { 'id': '121486', - 'title': 'Duck On Cover' + 'title': 'Shearer - Duck On Cover' }, 'playlist': [{ 'md5': 'e1a2fcb42bda30dfac990212924149a8', 'info_dict': { 'id': '1032333', 'ext': 'flac', - 'title': 'Warmachine' + 'title': 'Shearer - Warmachine' } }, { 'md5': '1f358d7b2f98edfe90fd55dac0799d50', 'info_dict': { 'id': '1032330', 'ext': 'flac', - 'title': 'Without Your Ghost' + 'title': 'Shearer - Without Your Ghost' } }], 'params': { @@ -90,7 +90,7 @@ class JamendoAlbumIE(InfoExtractor): webpage = self._download_webpage(url, mobj.group('display_id')) - title = self._html_search_meta('name', webpage, 'title') + title = self._search_regex(r'(.*?)\ \|\ Jamendo\ Music\ .*', webpage, 'title') entries = [ self.url_result( From 15846398ca0af9154b88a69f594557568c6a4782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 26 Jan 2017 23:23:08 +0700 Subject: [PATCH 0088/1696] [utils] Improve parse_duration --- test/test_utils.py | 1 + youtube_dl/utils.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index e99bf794e..a74d59f34 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -510,6 +510,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) self.assertEqual(parse_duration('87 Min.'), 5220) self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) + self.assertEqual(parse_duration('PT00H03M30SZ'), 210) def test_fix_xml_ampersands(self): self.assertEqual( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 98acc2b45..cf46711b9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1773,7 +1773,7 @@ def parse_duration(s): s = s.strip() days, hours, mins, secs, ms = [None] * 5 - m = re.match(r'(?:(?:(?:(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+)(?P\.[0-9]+)?$', s) + m = re.match(r'(?:(?:(?:(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+)(?P\.[0-9]+)?Z?$', s) if m: days, hours, mins, secs, ms = m.groups() else: @@ -1790,11 +1790,11 @@ def parse_duration(s): )? (?: (?P[0-9]+)(?P\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* - )?$''', s) + )?Z?$''', s) if m: days, hours, mins, secs, ms = m.groups() else: - m = re.match(r'(?i)(?:(?P[0-9.]+)\s*(?:hours?)|(?P[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)$', s) + m = re.match(r'(?i)(?:(?P[0-9.]+)\s*(?:hours?)|(?P[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s) if m: hours, mins = m.groups() else: From 3cbecdd11121b9c7ff0284e481992f7230806399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 26 Jan 2017 23:25:40 +0700 Subject: [PATCH 0089/1696] [jamendo] Improve and extract more metadata (closes #11836) --- youtube_dl/extractor/jamendo.py | 71 +++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py index 3db07e79f..595d7a5b7 100644 --- a/youtube_dl/extractor/jamendo.py +++ b/youtube_dl/extractor/jamendo.py @@ -5,9 +5,27 @@ import re from ..compat import compat_urlparse from .common import InfoExtractor - - -class JamendoIE(InfoExtractor): +from ..utils import parse_duration + + +class JamendoBaseIE(InfoExtractor): + def _extract_meta(self, webpage, fatal=True): + title = self._og_search_title( + webpage, default=None) or self._search_regex( + r'([^<]+)', webpage, + 'title', default=None) + if title: + title = self._search_regex( + r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None) + if not title: + title = self._html_search_meta( + 'name', webpage, 'title', fatal=fatal) + mobj = re.search(r'(.+) - (.+)', title or '') + artist, second = mobj.groups() if mobj else [None] * 2 + return title, artist, second + + +class JamendoIE(JamendoBaseIE): _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)' _TEST = { 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', @@ -17,6 +35,9 @@ class JamendoIE(InfoExtractor): 'display_id': 'stories-from-emona-i', 'ext': 'flac', 'title': 'Maya Filipič - Stories from Emona I', + 'artist': 'Maya Filipič', + 'track': 'Stories from Emona I', + 'duration': 210, 'thumbnail': r're:^https?://.*\.jpg' } } @@ -28,7 +49,7 @@ class JamendoIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - title = self._search_regex(r'<title>(.*?)\ \|\ Jamendo\ Music\ .*', webpage, 'title') + title, artist, track = self._extract_meta(webpage) formats = [{ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' @@ -46,17 +67,23 @@ class JamendoIE(InfoExtractor): thumbnail = self._html_search_meta( 'image', webpage, 'thumbnail', fatal=False) + duration = parse_duration(self._search_regex( + r']+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']', + webpage, 'duration', fatal=False)) return { 'id': track_id, 'display_id': display_id, 'thumbnail': thumbnail, 'title': title, + 'duration': duration, + 'artist': artist, + 'track': track, 'formats': formats } -class JamendoAlbumIE(InfoExtractor): +class JamendoAlbumIE(JamendoBaseIE): _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P[0-9]+)/(?P[\w-]+)' _TEST = { 'url': 'https://www.jamendo.com/album/121486/duck-on-cover', @@ -69,14 +96,18 @@ class JamendoAlbumIE(InfoExtractor): 'info_dict': { 'id': '1032333', 'ext': 'flac', - 'title': 'Shearer - Warmachine' + 'title': 'Shearer - Warmachine', + 'artist': 'Shearer', + 'track': 'Warmachine', } }, { 'md5': '1f358d7b2f98edfe90fd55dac0799d50', 'info_dict': { 'id': '1032330', 'ext': 'flac', - 'title': 'Shearer - Without Your Ghost' + 'title': 'Shearer - Without Your Ghost', + 'artist': 'Shearer', + 'track': 'Without Your Ghost', } }], 'params': { @@ -90,18 +121,18 @@ class JamendoAlbumIE(InfoExtractor): webpage = self._download_webpage(url, mobj.group('display_id')) - title = self._search_regex(r'(.*?)\ \|\ Jamendo\ Music\ .*', webpage, 'title') - - entries = [ - self.url_result( - compat_urlparse.urljoin(url, m.group('path')), - ie=JamendoIE.ie_key(), - video_id=self._search_regex( - r'/track/(\d+)', m.group('path'), - 'track id', default=None)) - for m in re.finditer( - r']+href=(["\'])(?P(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link', - webpage) - ] + title, artist, album = self._extract_meta(webpage, fatal=False) + + entries = [{ + '_type': 'url_transparent', + 'url': compat_urlparse.urljoin(url, m.group('path')), + 'ie_key': JamendoIE.ie_key(), + 'id': self._search_regex( + r'/track/(\d+)', m.group('path'), 'track id', default=None), + 'artist': artist, + 'album': album, + } for m in re.finditer( + r']+href=(["\'])(?P(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link', + webpage)] return self.playlist_result(entries, album_id, title) From 9463637887ba784e3499410ab0945dcd68002bc1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 26 Jan 2017 18:36:28 +0100 Subject: [PATCH 0090/1696] [tva] Add new extractor(closes #11842) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tva.py | 54 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 youtube_dl/extractor/tva.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0c3e081ad..81366f933 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -985,6 +985,7 @@ from .tv2 import ( ) from .tv3 import TV3IE from .tv4 import TV4IE +from .tva import TVAIE from .tvanouvelles import ( TVANouvellesIE, TVANouvellesArticleIE, diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py new file mode 100644 index 000000000..3ced098f9 --- /dev/null +++ b/youtube_dl/extractor/tva.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, + smuggle_url, +) + + +class TVAIE(InfoExtractor): + _VALID_URL = r'https?://videos\.tva\.ca/episode/(?P\d+)' + _TEST = { + 'url': 'http://videos.tva.ca/episode/85538', + 'info_dict': { + 'id': '85538', + 'ext': 'mp4', + 'title': 'Épisode du 25 janvier 2017', + 'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98', + 'upload_date': '20170126', + 'timestamp': 1485442329, + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + "https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id, + video_id, query={ + '$expand': 'Metadata,CustomId', + '$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName', + '$format': 'json', + }) + metadata = video_data.get('Metadata', {}) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'title': video_data['Title'], + 'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}), + 'description': video_data.get('LongDescription') or video_data.get('ShortDescription'), + 'series': video_data.get('ShowName'), + 'episode': metadata.get('EpisodeTitle'), + 'episode_number': int_or_none(metadata.get('EpisodeNumber')), + 'categories': video_data.get('Categories'), + 'average_rating': video_data.get('AverageUserRating'), + 'timestamp': parse_iso8601(video_data.get('CreatedDate')), + 'ie_key': 'Ooyala', + } From b51a4ebed45a3944c02bb3c36778630fd9306de7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 26 Jan 2017 19:15:43 +0100 Subject: [PATCH 0091/1696] [aenetworks] fix season episodes extraction(fixes #11669) --- youtube_dl/extractor/aenetworks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index c5e079a40..c97317400 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -87,7 +87,7 @@ class AENetworksIE(AENetworksBaseIE): self._html_search_meta('aetn:SeriesTitle', webpage)) elif url_parts_len == 2: entries = [] - for episode_item in re.findall(r'(?s)]+class="[^"]*episode-item[^"]*"[^>]*>', webpage): + for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage): episode_attributes = extract_attributes(episode_item) episode_url = compat_urlparse.urljoin( url, episode_attributes['data-canonical']) From 0b23c222ba099d73c287d024f45f90714c15f289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 21:31:26 +0700 Subject: [PATCH 0092/1696] [twitch:vod] Expand _VALID_URL (closes #11846) --- youtube_dl/extractor/twitch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 6d67bda86..1ca159a4d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -209,7 +209,7 @@ class TwitchVodIE(TwitchItemBaseIE): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?twitch\.tv/[^/]+/v/| + (?:www\.)?twitch\.tv/(?:[^/]+/v|videos)/| player\.twitch\.tv/\?.*?\bvideo=v ) (?P\d+) @@ -259,6 +259,9 @@ class TwitchVodIE(TwitchItemBaseIE): }, { 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877', 'only_matching': True, + }, { + 'url': 'https://www.twitch.tv/videos/6528877', + 'only_matching': True, }] def _real_extract(self, url): From 489ffc118232056537e86bd0281488e217fce7d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 22:55:42 +0700 Subject: [PATCH 0093/1696] [soundcloud] Fix track URL extraction (closes #11852) --- youtube_dl/extractor/soundcloud.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 5a201eaa8..96bebeec5 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -173,11 +173,12 @@ class SoundcloudIE(InfoExtractor): }) # We have to retrieve the url - streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?' - 'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token)) format_dict = self._download_json( - streams_url, - track_id, 'Downloading track url') + 'http://api.soundcloud.com/i1/tracks/%s/streams' % track_id, + track_id, 'Downloading track url', query={ + 'client_id': self._CLIENT_ID, + 'secret_token': secret_token, + }) for key, stream_url in format_dict.items(): if key.startswith('http'): From 9b73471801d24cec678226c82cce9e9ece92732e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 23:08:32 +0700 Subject: [PATCH 0094/1696] [soundcloud] Extract hls formats --- youtube_dl/extractor/soundcloud.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 96bebeec5..55c80e1cc 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -198,6 +198,13 @@ class SoundcloudIE(InfoExtractor): 'ext': 'flv', 'vcodec': 'none', }) + elif key.startswith('hls'): + m3u8_formats = self._extract_m3u8_formats( + stream_url, track_id, 'mp3', entry_protocol='m3u8_native', + m3u8_id=key, fatal=False) + for f in m3u8_formats: + f['vcodec'] = 'none' + formats.extend(m3u8_formats) if not formats: # We fallback to the stream_url in the original info, this From 3a194cb4ecfa8c2590f22236dffc84e1b1565196 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 23:16:30 +0700 Subject: [PATCH 0095/1696] [soundcloud] Improve formats extraction and extract audio bitrate --- youtube_dl/extractor/soundcloud.py | 52 +++++++++++++++--------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 55c80e1cc..b3aa4ce26 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -181,46 +181,46 @@ class SoundcloudIE(InfoExtractor): }) for key, stream_url in format_dict.items(): + abr = int_or_none(self._search_regex( + r'_(\d+)_url', key, 'audio bitrate', default=None)) if key.startswith('http'): - formats.append({ + stream_formats = [{ 'format_id': key, 'ext': ext, 'url': stream_url, - 'vcodec': 'none', - }) + }] elif key.startswith('rtmp'): # The url doesn't have an rtmp app, we have to extract the playpath url, path = stream_url.split('mp3:', 1) - formats.append({ + stream_formats = [{ 'format_id': key, 'url': url, 'play_path': 'mp3:' + path, 'ext': 'flv', - 'vcodec': 'none', - }) + }] elif key.startswith('hls'): - m3u8_formats = self._extract_m3u8_formats( + stream_formats = self._extract_m3u8_formats( stream_url, track_id, 'mp3', entry_protocol='m3u8_native', m3u8_id=key, fatal=False) - for f in m3u8_formats: - f['vcodec'] = 'none' - formats.extend(m3u8_formats) - - if not formats: - # We fallback to the stream_url in the original info, this - # cannot be always used, sometimes it can give an HTTP 404 error - formats.append({ - 'format_id': 'fallback', - 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID, - 'ext': ext, - 'vcodec': 'none', - }) - - for f in formats: - if f['format_id'].startswith('http'): - f['protocol'] = 'http' - if f['format_id'].startswith('rtmp'): - f['protocol'] = 'rtmp' + else: + continue + + for f in stream_formats: + f['abr'] = abr + + formats.extend(stream_formats) + + if not formats: + # We fallback to the stream_url in the original info, this + # cannot be always used, sometimes it can give an HTTP 404 error + formats.append({ + 'format_id': 'fallback', + 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID, + 'ext': ext, + }) + + for f in formats: + f['vcodec'] = 'none' self._check_formats(formats, track_id) self._sort_formats(formats) From e0b6e50ccd124c6f618bf25bc94361d83cbc8b86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Jan 2017 23:55:55 +0700 Subject: [PATCH 0096/1696] [crunchyroll] Improve series and season metadata extraction (closes #11832) --- youtube_dl/extractor/crunchyroll.py | 38 ++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 559044352..f811c7f33 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -166,6 +166,25 @@ class CrunchyrollIE(CrunchyrollBaseIE): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589', + 'info_dict': { + 'id': '727589', + 'ext': 'mp4', + 'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance from this Judicial Injustice!", + 'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Kadokawa Pictures Inc.', + 'upload_date': '20170118', + 'series': "KONOSUBA -God's blessing on this wonderful world!", + 'season_number': 2, + 'episode': 'Give Me Deliverance from this Judicial Injustice!', + 'episode_number': 1, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697', 'only_matching': True, @@ -439,6 +458,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text subtitles = self.extract_subtitles(video_id, webpage) + # webpage provide more accurate data than series_title from XML + series = self._html_search_regex( + r'id=["\']showmedia_about_episode_num[^>]+>\s*]+>([^<]+)', + webpage, 'series', default=xpath_text(metadata, 'series_title')) + + episode = xpath_text(metadata, 'episode_title') + episode_number = int_or_none(xpath_text(metadata, 'episode_number')) + + season_number = int_or_none(self._search_regex( + r'(?s)]+id=["\']showmedia_about_episode_num[^>]+>.+?\s*

\s*Season (\d+)', + webpage, 'season number', default=None)) + return { 'id': video_id, 'title': video_title, @@ -446,9 +477,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'thumbnail': xpath_text(metadata, 'episode_image_url'), 'uploader': video_uploader, 'upload_date': video_upload_date, - 'series': xpath_text(metadata, 'series_title'), - 'episode': xpath_text(metadata, 'episode_title'), - 'episode_number': int_or_none(xpath_text(metadata, 'episode_number')), + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, 'subtitles': subtitles, 'formats': formats, } From 815d2a36d81c4cc6181d0536ce811b0e2e4a5021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 00:03:21 +0700 Subject: [PATCH 0097/1696] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index ff305d7e8..2c670c62e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version + +Core +* [utils] Improve parse_duration + +Extractors +* [crunchyroll] Improve series and season metadata extraction (#11832) +* [soundcloud] Improve formats extraction and extract audio bitrate ++ [soundcloud] Extract HLS formats +* [soundcloud] Fix track URL extraction (#11852) ++ [twitch:vod] Expand URL regular expressions (#11846) +* [aenetworks] Fix season episodes extraction (#11669) ++ [tva] Add support for videos.tva.ca (#11842) +* [jamendo] Improve and extract more metadata (#11836) ++ [disney] Add support for Disney sites (#7409, #11801, #4975, #11000) +* [vevo] Remove request to old API and catch API v2 errors ++ [cmt,mtv,southpark] Add support for episode URLs (#11837) ++ [youtube] Add fallback for duration extraction (#11841) + + version 2017.01.25 Extractors From d41ed6d243c2079db123963a7f65e91f24b390f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 00:33:55 +0700 Subject: [PATCH 0098/1696] release 2017.01.28 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4d409f785..693f3b745 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.25** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.28** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.25 +[debug] youtube-dl version 2017.01.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 2c670c62e..8e5a04b42 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.01.28 Core * [utils] Improve parse_duration diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f640cfcaa..6318a862f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -202,6 +202,7 @@ - **Digiteka** - **Discovery** - **DiscoveryGo** + - **Disney** - **Dotsub** - **DouyuTV**: 斗鱼 - **DPlay** @@ -785,6 +786,7 @@ - **TV2Article** - **TV3** - **TV4**: tv4.se and tv4play.se + - **TVA** - **TVANouvelles** - **TVANouvellesArticle** - **TVC** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c23fe85de..c22c410a8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.25' +__version__ = '2017.01.28' From 99a0baf370c7652f6103cff71f878872229b4129 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Tue, 24 Jan 2017 17:42:00 +0100 Subject: [PATCH 0099/1696] [konserthusetplay] Add support for rspoplay.se --- youtube_dl/extractor/konserthusetplay.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dl/extractor/konserthusetplay.py index 3ae2aa317..c11cbcf47 100644 --- a/youtube_dl/extractor/konserthusetplay.py +++ b/youtube_dl/extractor/konserthusetplay.py @@ -11,22 +11,22 @@ from ..utils import ( class KonserthusetPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?.*\bm=(?P[^&]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P[^&]+)' + _TESTS = [{ 'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A', + 'md5': 'e3fd47bf44e864bd23c08e487abe1967', 'info_dict': { 'id': 'CKDDnlCY-dhWAAqiMERd-A', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Orkesterns instrument: Valthornen', 'description': 'md5:f10e1f0030202020396a4d712d2fa827', 'thumbnail': 're:^https?://.*$', - 'duration': 398.8, + 'duration': 398.76, }, - 'params': { - # rtmp download - 'skip_download': True, - }, - } + }, { + 'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 26e40542dd730b1a18f9d7eebe241972b77810cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 17:50:56 +0700 Subject: [PATCH 0100/1696] [kaltura] Improve uploader_id extraction --- youtube_dl/extractor/kaltura.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index c0ddad6f9..a57d913af 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -319,6 +319,6 @@ class KalturaIE(InfoExtractor): 'thumbnail': info.get('thumbnailUrl'), 'duration': info.get('duration'), 'timestamp': info.get('createdAt'), - 'uploader_id': info.get('userId'), + 'uploader_id': info.get('userId') if info.get('userId') != 'None' else None, 'view_count': info.get('plays'), } From ab6f6aee78fc4757fcb65bd8f4699aaf9feac3a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 18:27:42 +0700 Subject: [PATCH 0101/1696] [kaltura] Add fallback for fileExt --- youtube_dl/extractor/kaltura.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index a57d913af..5ef382f9f 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -266,9 +266,12 @@ class KalturaIE(InfoExtractor): # skip for now. if f.get('fileExt') == 'chun': continue - if not f.get('fileExt') and f.get('containerFormat') == 'qt': + if not f.get('fileExt'): # QT indicates QuickTime; some videos have broken fileExt - f['fileExt'] = 'mov' + if f.get('containerFormat') == 'qt': + f['fileExt'] = 'mov' + else: + f['fileExt'] = 'mp4' video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g From b92d3c5343536eb0a865afa79e3787fc384ec0ec Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 24 Jan 2017 13:52:17 +0800 Subject: [PATCH 0102/1696] [vlive] Add support for channels --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/vlive.py | 68 ++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 81366f933..c781c9b87 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1129,7 +1129,10 @@ from .vk import ( VKUserVideosIE, VKWallPostIE, ) -from .vlive import VLiveIE +from .vlive import ( + VLiveIE, + VLiveChannelIE +) from .vodlocker import VodlockerIE from .vodplatform import VODPlatformIE from .voicerepublic import VoiceRepublicIE diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 540246c79..70bab1f04 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals import re +import time +import itertools from .common import InfoExtractor from ..utils import ( @@ -169,3 +171,69 @@ class VLiveIE(InfoExtractor): 'subtitles': subtitles, }) return info + + +class VLiveChannelIE(InfoExtractor): + IE_NAME = 'vlive:channel' + _VALID_URL = r'https?://channels\.vlive\.tv/(?P[0-9A-Z]+)/video' + _TEST = { + 'url': 'http://channels.vlive.tv/FCD4B/video', + 'info_dict': { + 'id': 'FCD4B', + 'title': 'MAMAMOO', + }, + 'playlist_mincount': 110 + } + _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' + + def _real_extract(self, url): + channel_code = self._match_id(url) + + webpage = self._download_webpage( + 'http://channels.vlive.tv/%s/video' % channel_code, channel_code) + app_js_url = self._search_regex( + r'(http[^\'\"\s]+app\.js)', webpage, 'app js', default='') + + if app_js_url: + app_js = self._download_webpage(app_js_url, channel_code, 'app js') + app_id = self._search_regex( + r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', + app_js, 'app id', default=self._APP_ID) + else: + app_id = self._APP_ID + + channel_info = self._download_json( + 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode', + channel_code, note='decode channel code', + query={'app_id': app_id, 'channelCode': channel_code, '_': int(time.time())}) + + channel_seq = channel_info['result']['channelSeq'] + channel_name = None + entries = [] + + for page_num in itertools.count(1): + video_list = self._download_json( + 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', + channel_code, note='channel list %d' % page_num, + query={ + 'app_id': app_id, + 'channelSeq': channel_seq, + 'maxNumOfRows': 1000, + '_': int(time.time()), + 'pageNo': page_num + } + ) + if not channel_name: + channel_name = video_list['result']['channelInfo']['channelName'] + + if not video_list['result'].get('videoList'): + break + + for video in video_list['result']['videoList']: + video_id = str(video['videoSeq']) + entries.append( + self.url_result( + 'http://www.vlive.tv/video/%s' % video_id, 'Vlive', video_id)) + + return self.playlist_result( + entries, channel_code, channel_name) From 661cc229d2e885dd303d26535477c8905805ddf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 19:08:01 +0700 Subject: [PATCH 0103/1696] [vlive:channel] Improve --- youtube_dl/extractor/vlive.py | 62 ++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 70bab1f04..b9718901b 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -6,15 +6,19 @@ import time import itertools from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlencode, + compat_str, +) from ..utils import ( dict_get, ExtractorError, float_or_none, int_or_none, remove_start, + try_get, urlencode_postdata, ) -from ..compat import compat_urllib_parse_urlencode class VLiveIE(InfoExtractor): @@ -175,9 +179,9 @@ class VLiveIE(InfoExtractor): class VLiveChannelIE(InfoExtractor): IE_NAME = 'vlive:channel' - _VALID_URL = r'https?://channels\.vlive\.tv/(?P[0-9A-Z]+)/video' + _VALID_URL = r'https?://channels\.vlive\.tv/(?P[0-9A-Z]+)' _TEST = { - 'url': 'http://channels.vlive.tv/FCD4B/video', + 'url': 'http://channels.vlive.tv/FCD4B', 'info_dict': { 'id': 'FCD4B', 'title': 'MAMAMOO', @@ -191,21 +195,31 @@ class VLiveChannelIE(InfoExtractor): webpage = self._download_webpage( 'http://channels.vlive.tv/%s/video' % channel_code, channel_code) + + app_id = None + app_js_url = self._search_regex( - r'(http[^\'\"\s]+app\.js)', webpage, 'app js', default='') + r']+src=(["\'])(?Phttp.+?/app\.js.*?)\1', + webpage, 'app js', default=None, group='url') if app_js_url: - app_js = self._download_webpage(app_js_url, channel_code, 'app js') - app_id = self._search_regex( - r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', - app_js, 'app id', default=self._APP_ID) - else: - app_id = self._APP_ID + app_js = self._download_webpage( + app_js_url, channel_code, 'Downloading app JS', fatal=False) + if app_js: + app_id = self._search_regex( + r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', + app_js, 'app id', default=None) + + app_id = app_id or self._APP_ID channel_info = self._download_json( 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode', - channel_code, note='decode channel code', - query={'app_id': app_id, 'channelCode': channel_code, '_': int(time.time())}) + channel_code, note='Downloading decode channel code', + query={ + 'app_id': app_id, + 'channelCode': channel_code, + '_': int(time.time()) + }) channel_seq = channel_info['result']['channelSeq'] channel_name = None @@ -214,7 +228,7 @@ class VLiveChannelIE(InfoExtractor): for page_num in itertools.count(1): video_list = self._download_json( 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', - channel_code, note='channel list %d' % page_num, + channel_code, note='Downloading channel list page #%d' % page_num, query={ 'app_id': app_id, 'channelSeq': channel_seq, @@ -223,17 +237,27 @@ class VLiveChannelIE(InfoExtractor): 'pageNo': page_num } ) - if not channel_name: - channel_name = video_list['result']['channelInfo']['channelName'] - if not video_list['result'].get('videoList'): + if not channel_name: + channel_name = try_get( + video_list, + lambda x: x['result']['channelInfo']['channelName'], + compat_str) + + videos = try_get( + video_list, lambda x: x['result']['videoList'], list) + if not videos: break - for video in video_list['result']['videoList']: - video_id = str(video['videoSeq']) + for video in videos: + video_id = video.get('videoSeq') + if not video_id: + continue + video_id = compat_str(video_id) entries.append( self.url_result( - 'http://www.vlive.tv/video/%s' % video_id, 'Vlive', video_id)) + 'http://www.vlive.tv/video/%s' % video_id, + ie=VLiveIE.ie_key(), video_id=video_id)) return self.playlist_result( entries, channel_code, channel_name) From 008f247077027f10c947060d8f3bb886c9af6aa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 20:29:22 +0700 Subject: [PATCH 0104/1696] [mtv81] Add extractor (closes #7619) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/mtv.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c781c9b87..915291f74 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -553,6 +553,7 @@ from .mtv import ( MTVVideoIE, MTVServicesEmbeddedIE, MTVDEIE, + MTV81IE, ) from .muenchentv import MuenchenTVIE from .musicplayon import MusicPlayOnIE diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 855c3996f..8acea1461 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -327,6 +327,35 @@ class MTVIE(MTVServicesInfoExtractor): }] +class MTV81IE(InfoExtractor): + IE_NAME = 'mtv81' + _VALID_URL = r'https?://(?:www\.)?mtv81\.com/videos/(?P[^/?#.]+)' + + _TEST = { + 'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/', + 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b', + 'info_dict': { + 'id': '5e14040d-18a4-47c4-a582-43ff602de88e', + 'ext': 'mp4', + 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer', + 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.', + 'timestamp': 1468846800, + 'upload_date': '20160718', + }, + } + + def _extract_mgid(self, webpage): + return self._search_regex( + r'getTheVideo\((["\'])(?Pmgid:.+?)\1', webpage, + 'mgid', group='id') + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mgid = self._extract_mgid(webpage) + return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) + + class MTVVideoIE(MTVServicesInfoExtractor): IE_NAME = 'mtv:video' _VALID_URL = r'''(?x)^https?:// From 732fb3f8be6cca47c60b3befee83ee9b5002984d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 21:06:22 +0700 Subject: [PATCH 0105/1696] [options] Move --abort-on-unavailable-fragment to download section --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 0d2ce8d15..5e2936555 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -450,7 +450,7 @@ def parseOpts(overrideArguments=None): '--skip-unavailable-fragments', action='store_true', dest='skip_unavailable_fragments', default=True, help='Skip unavailable fragments (DASH and hlsnative only)') - general.add_option( + downloader.add_option( '--abort-on-unavailable-fragment', action='store_false', dest='skip_unavailable_fragments', help='Abort downloading when some fragment is not available') From a71b8d3b3bb399acb82f3ccfbd8a19d411848db4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 28 Jan 2017 15:51:52 +0100 Subject: [PATCH 0106/1696] [itv] Add new extractor(closes #9240) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/itv.py | 181 +++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+) create mode 100644 youtube_dl/extractor/itv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 915291f74..086a2296d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -415,6 +415,7 @@ from .internetvideoarchive import InternetVideoArchiveIE from .iprima import IPrimaIE from .iqiyi import IqiyiIE from .ir90tv import Ir90TvIE +from .itv import ITVIE from .ivi import ( IviIE, IviCompilationIE diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py new file mode 100644 index 000000000..d029609c3 --- /dev/null +++ b/youtube_dl/extractor/itv.py @@ -0,0 +1,181 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import uuid +import xml.etree.ElementTree as etree +import json + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + extract_attributes, + xpath_with_ns, + xpath_element, + xpath_text, + int_or_none, + parse_duration, + ExtractorError, + determine_ext, +) + + +class ITVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P[0-9a-z]+)' + _TEST = { + 'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', + 'info_dict': { + 'id': '2a2936a0053', + 'ext': 'flv', + 'title': 'Home Movie', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + params = extract_attributes(self._search_regex( + r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params')) + + ns_map = { + 'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/', + 'tem': 'http://tempuri.org/', + 'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types', + 'com': 'http://schemas.itv.com/2009/05/Common', + } + for ns, full_ns in ns_map.items(): + etree.register_namespace(ns, full_ns) + + def _add_ns(name): + return xpath_with_ns(name, ns_map) + + def _add_sub_element(element, name): + return etree.SubElement(element, _add_ns(name)) + + req_env = etree.Element(_add_ns('soapenv:Envelope')) + _add_sub_element(req_env, 'soapenv:Header') + body = _add_sub_element(req_env, 'soapenv:Body') + get_playlist = _add_sub_element(body, ('tem:GetPlaylist')) + request = _add_sub_element(get_playlist, 'tem:request') + _add_sub_element(request, 'itv:ProductionId').text = params['data-video-id'] + _add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper() + vodcrid = _add_sub_element(request, 'itv:Vodcrid') + _add_sub_element(vodcrid, 'com:Id') + _add_sub_element(request, 'itv:Partition') + user_info = _add_sub_element(get_playlist, 'tem:userInfo') + _add_sub_element(user_info, 'itv:Broadcaster').text = 'Itv' + _add_sub_element(user_info, 'itv:DM') + _add_sub_element(user_info, 'itv:RevenueScienceValue') + _add_sub_element(user_info, 'itv:SessionId') + _add_sub_element(user_info, 'itv:SsoToken') + _add_sub_element(user_info, 'itv:UserToken') + site_info = _add_sub_element(get_playlist, 'tem:siteInfo') + _add_sub_element(site_info, 'itv:AdvertisingRestriction').text = 'None' + _add_sub_element(site_info, 'itv:AdvertisingSite').text = 'ITV' + _add_sub_element(site_info, 'itv:AdvertisingType').text = 'Any' + _add_sub_element(site_info, 'itv:Area').text = 'ITVPLAYER.VIDEO' + _add_sub_element(site_info, 'itv:Category') + _add_sub_element(site_info, 'itv:Platform').text = 'DotCom' + _add_sub_element(site_info, 'itv:Site').text = 'ItvCom' + device_info = _add_sub_element(get_playlist, 'tem:deviceInfo') + _add_sub_element(device_info, 'itv:ScreenSize').text = 'Big' + player_info = _add_sub_element(get_playlist, 'tem:playerInfo') + _add_sub_element(player_info, 'itv:Version').text = '2' + + headers = self.geo_verification_headers() + headers.update({ + 'Content-Type': 'text/xml; charset=utf-8', + 'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist', + }) + resp_env = self._download_xml( + params['data-playlist-url'], video_id, + headers=headers, data=etree.tostring(req_env)) + playlist = xpath_element(resp_env, './/Playlist') + if playlist is None: + fault_string = xpath_text(resp_env, './/faultstring') + raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) + title = xpath_text(playlist, 'EpisodeTitle', fatal=True) + media_files = xpath_element(playlist, 'VideoEntries/Video/MediaFiles', fatal=True) + rtmp_url = media_files.attrib['base'] + + formats = [] + for media_file in media_files.findall('MediaFile'): + play_path = xpath_text(media_file, 'URL') + if not play_path: + continue + tbr = int_or_none(media_file.get('bitrate'), 1000) + formats.append({ + 'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''), + 'url': rtmp_url, + 'play_path': play_path, + 'tbr': tbr, + 'ext': 'flv', + }) + + ios_playlist_url = params.get('data-video-playlist') + hmac = params.get('data-video-hmac') + if ios_playlist_url and hmac: + headers = self.geo_verification_headers() + headers.update({ + 'Accept': 'application/vnd.itv.vod.playlist.v2+json', + 'Content-Type': 'application/json', + 'hmac': hmac.upper(), + }) + ios_playlist = self._download_json( + ios_playlist_url, video_id, data=json.dumps({ + 'user': { + 'itvUserId': '', + 'entitlements': [], + 'token': '' + }, + 'device': { + 'manufacturer': 'Apple', + 'model': 'iPad', + 'os': { + 'name': 'iPhone OS', + 'version': '9.3', + 'type': 'ios' + } + }, + 'client': { + 'version': '4.1', + 'id': 'browser' + }, + 'variantAvailability': { + 'featureset': { + 'min': ['hls', 'aes'], + 'max': ['hls', 'aes'] + }, + 'platformTag': 'mobile' + } + }).encode(), headers=headers, fatal=False) + if ios_playlist: + video_data = ios_playlist.get('Playlist', {}).get('Video', {}) + ios_base_url = video_data.get('Base') + for media_file in video_data.get('MediaFiles', []): + href = media_file.get('Href') + if not href: + continue + if ios_base_url: + href = ios_base_url + href + ext = determine_ext(href) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats(href, video_id, 'mp4', m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': href, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'episode_title': title, + 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), + 'series': xpath_text(playlist, 'ProgrammeTitle'), + 'duartion': parse_duration(xpath_text(playlist, 'Duration')), + } From 24ee6b9721770b7066f10f6a6773f1ce15f82ed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 22:40:07 +0700 Subject: [PATCH 0107/1696] [options] Remove experimental mark from some options --- youtube_dl/options.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5e2936555..09c9387ca 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -216,23 +216,23 @@ def parseOpts(overrideArguments=None): network.add_option( '--source-address', metavar='IP', dest='source_address', default=None, - help='Client-side IP address to bind to (experimental)', + help='Client-side IP address to bind to', ) network.add_option( '-4', '--force-ipv4', action='store_const', const='0.0.0.0', dest='source_address', - help='Make all connections via IPv4 (experimental)', + help='Make all connections via IPv4', ) network.add_option( '-6', '--force-ipv6', action='store_const', const='::', dest='source_address', - help='Make all connections via IPv6 (experimental)', + help='Make all connections via IPv6', ) network.add_option( '--geo-verification-proxy', dest='geo_verification_proxy', default=None, metavar='URL', help='Use this proxy to verify the IP address for some geo-restricted sites. ' - 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)' + 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.' ) network.add_option( '--cn-verification-proxy', @@ -297,7 +297,7 @@ def parseOpts(overrideArguments=None): '--match-filter', metavar='FILTER', dest='match_filter', default=None, help=( - 'Generic video filter (experimental). ' + 'Generic video filter. ' 'Specify any key (see help for -o for a list of available keys) to' ' match if the key is present, ' '!key to check if the key is not present,' From f592ff98683794e0f79c96cbec67b737ae8da00c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 28 Jan 2017 17:25:15 +0100 Subject: [PATCH 0108/1696] [itv] extract subtitles --- youtube_dl/extractor/itv.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index d029609c3..d65cdc6af 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -20,7 +20,7 @@ from ..utils import ( class ITVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P[0-9a-z]+)' + _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P[0-9a-zA-Z]+)' _TEST = { 'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', 'info_dict': { @@ -98,7 +98,8 @@ class ITVIE(InfoExtractor): fault_string = xpath_text(resp_env, './/faultstring') raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) title = xpath_text(playlist, 'EpisodeTitle', fatal=True) - media_files = xpath_element(playlist, 'VideoEntries/Video/MediaFiles', fatal=True) + video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) + media_files = xpath_element(video_element, 'MediaFiles', fatal=True) rtmp_url = media_files.attrib['base'] formats = [] @@ -170,10 +171,21 @@ class ITVIE(InfoExtractor): }) self._sort_formats(formats) + subtitles = {} + for caption_url in video_element.findall('ClosedCaptioningURIs/URL'): + if not caption_url.text: + continue + ext = determine_ext(caption_url.text, 'ttml') + subtitles.setdefault('en', []).append({ + 'url': caption_url, + 'ext': 'ttml' if ext == 'xml' else ext, + }) + return { 'id': video_id, 'title': title, 'formats': formats, + 'subtitles': subtitles, 'episode_title': title, 'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')), 'series': xpath_text(playlist, 'ProgrammeTitle'), From 4edeac5bfae76966fd14f636bd68850ea0403ece Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 28 Jan 2017 17:28:18 +0100 Subject: [PATCH 0109/1696] [itv] fix subtitle extraction --- youtube_dl/extractor/itv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index d65cdc6af..0328c7093 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -177,7 +177,7 @@ class ITVIE(InfoExtractor): continue ext = determine_ext(caption_url.text, 'ttml') subtitles.setdefault('en', []).append({ - 'url': caption_url, + 'url': caption_url.text, 'ext': 'ttml' if ext == 'xml' else ext, }) From acbb2374bce27eda16764b80832f88cf833a51e5 Mon Sep 17 00:00:00 2001 From: Costy Petrisor Date: Sun, 1 May 2016 12:34:11 +0000 Subject: [PATCH 0110/1696] added --autonumber-start NUMBER as a command line option to be able to offset the index at which autonumber formats filenames --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 1 + youtube_dl/options.py | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 41d9a63ee..c71e94518 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -584,7 +584,7 @@ class YoutubeDL(object): if autonumber_size is None: autonumber_size = 5 autonumber_templ = '%0' + str(autonumber_size) + 'd' - template_dict['autonumber'] = autonumber_templ % self._num_downloads + template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads) if template_dict.get('playlist_index') is not None: template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index']) if template_dict.get('resolution') is None: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index dfa4ae839..577bc880f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -321,6 +321,7 @@ def _real_main(argv=None): 'listformats': opts.listformats, 'outtmpl': outtmpl, 'autonumber_size': opts.autonumber_size, + 'autonumber_start': opts.autonumber_start, 'restrictfilenames': opts.restrictfilenames, 'ignoreerrors': opts.ignoreerrors, 'force_generic_extractor': opts.force_generic_extractor, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 09c9387ca..571525434 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -663,6 +663,10 @@ def parseOpts(overrideArguments=None): '--autonumber-size', dest='autonumber_size', metavar='NUMBER', help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') + filesystem.add_option( + '--autonumber-start', + dest='autonumber_start', metavar='NUMBER', type="int", default=1, + help='Specify the start value for the %(autonumber)s counter. Defaults to 1.') filesystem.add_option( '--restrict-filenames', action='store_true', dest='restrictfilenames', default=False, From 1a241a2d02e2507219e81d7b18c18f10937ae6e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 23:57:56 +0700 Subject: [PATCH 0111/1696] [options] Refactor autonumber options and add validation (closes #727, closes #2702, closes #9362) --- youtube_dl/__init__.py | 6 ++++++ youtube_dl/options.py | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 577bc880f..2b156342a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -133,6 +133,12 @@ def _real_main(argv=None): parser.error('TV Provider account username missing\n') if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): parser.error('using output template conflicts with using title, video ID or auto number') + if opts.autonumber_size is not None: + if opts.autonumber_size <= 0: + parser.error('auto number size must be positive') + if opts.autonumber_start is not None: + if opts.autonumber_start < 0: + parser.error('auto number start must be positive or 0') if opts.usetitle and opts.useid: parser.error('using title conflicts with using video ID') if opts.username is not None and opts.password is None: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 571525434..3abf621c0 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -661,12 +661,12 @@ def parseOpts(overrideArguments=None): help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info')) filesystem.add_option( '--autonumber-size', - dest='autonumber_size', metavar='NUMBER', - help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') + dest='autonumber_size', metavar='NUMBER', default=5, type=int, + help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given (default is %default)') filesystem.add_option( '--autonumber-start', - dest='autonumber_start', metavar='NUMBER', type="int", default=1, - help='Specify the start value for the %(autonumber)s counter. Defaults to 1.') + dest='autonumber_start', metavar='NUMBER', default=1, type=int, + help='Specify the start value for %(autonumber)s (default is %default)') filesystem.add_option( '--restrict-filenames', action='store_true', dest='restrictfilenames', default=False, From c0af11abeeaad75f4387ad77adc751715dfc0cf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 00:52:23 +0700 Subject: [PATCH 0112/1696] Credit @AVerwer for showroomlive (#11458) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 9e092cccc..90ff4d789 100644 --- a/AUTHORS +++ b/AUTHORS @@ -191,3 +191,4 @@ Rich Leeper Zhong Jianxin Thor77 Mattias Wadman +Arjan Verwer From ffcfb7e3e01cec5f5468e4639b2e4d44a0c7bfba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 00:54:31 +0700 Subject: [PATCH 0113/1696] Credit @costypetrisor for autonumber start (#9362) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 90ff4d789..6b6e38613 100644 --- a/AUTHORS +++ b/AUTHORS @@ -192,3 +192,4 @@ Zhong Jianxin Thor77 Mattias Wadman Arjan Verwer +Costy Petrisor From 34cea6137e6df158c99d83fd1c1af55f94ee4a38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 00:57:15 +0700 Subject: [PATCH 0114/1696] Credit @einstein95 for pornflip (#11795) and chaturbate fix (#11797) --- AUTHORS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AUTHORS b/AUTHORS index 6b6e38613..600e2c55b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -193,3 +193,5 @@ Thor77 Mattias Wadman Arjan Verwer Costy Petrisor +Logan B + From 186f4abe938e0f631b63c5dc1aaa4d622513a366 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 00:59:17 +0700 Subject: [PATCH 0115/1696] Credit @goggle for 20min (#11683) and azmedien (#11805) --- AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 600e2c55b..b3193f7da 100644 --- a/AUTHORS +++ b/AUTHORS @@ -194,4 +194,4 @@ Mattias Wadman Arjan Verwer Costy Petrisor Logan B - +Alex Seiler From f5169501d2749503e5d19f9c51937aedcce357e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 01:00:17 +0700 Subject: [PATCH 0116/1696] Credit @sudovijay for openload fix (#11646) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index b3193f7da..434217abb 100644 --- a/AUTHORS +++ b/AUTHORS @@ -195,3 +195,4 @@ Arjan Verwer Costy Petrisor Logan B Alex Seiler +Vijay Singh From 4d07b748c2e8057fa6417ab5422cb19be313d7b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 01:01:39 +0700 Subject: [PATCH 0117/1696] Credit @bastik for zdf fix (#11063) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 434217abb..49ffc99aa 100644 --- a/AUTHORS +++ b/AUTHORS @@ -196,3 +196,4 @@ Costy Petrisor Logan B Alex Seiler Vijay Singh +Paul Hartmann From 59c307891ac2cca2b2db42a534a3f4de61820450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 01:02:28 +0700 Subject: [PATCH 0118/1696] Credit @RPing for cntv (#8541) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 49ffc99aa..78a7a5291 100644 --- a/AUTHORS +++ b/AUTHORS @@ -197,3 +197,4 @@ Logan B Alex Seiler Vijay Singh Paul Hartmann +Stephen Chen From 0842b8241d9f8984dd70266b59aa68241259401f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 01:03:59 +0700 Subject: [PATCH 0119/1696] Credit @fast90 for config location (#10648) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 78a7a5291..022a5de84 100644 --- a/AUTHORS +++ b/AUTHORS @@ -198,3 +198,4 @@ Alex Seiler Vijay Singh Paul Hartmann Stephen Chen +Fabian Stahl From 56fc078da84a7f26d8290b2b425cc2da66a5975a Mon Sep 17 00:00:00 2001 From: Andre Walker Date: Sat, 28 Jan 2017 16:19:38 +0100 Subject: [PATCH 0120/1696] [npo] Update subtitles url NPO websites changed the domain they used for subtitles, from e.omroep.nl to tt888.omroep.nl. --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index c91f58461..962437145 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -241,7 +241,7 @@ class NPOIE(NPOBaseIE): if metadata.get('tt888') == 'ja': subtitles['nl'] = [{ 'ext': 'vtt', - 'url': 'http://e.omroep.nl/tt888/%s' % video_id, + 'url': 'http://tt888.omroep.nl/tt888/%s' % video_id, }] return { From 76aaf1faaed613569cb71e4f9aa7bd218f27c54b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 03:43:46 +0700 Subject: [PATCH 0121/1696] Credit @BagiraHun for videa (#11133) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 022a5de84..3ef2800c9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -199,3 +199,4 @@ Vijay Singh Paul Hartmann Stephen Chen Fabian Stahl +Bagira From d04621daf451d601dba80dc0f2baa29e404e4ca6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 05:36:53 +0700 Subject: [PATCH 0122/1696] [extractor/common] Fix duration per dash segment (closes #11868) --- youtube_dl/extractor/common.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dce8c7d0d..a3048fb59 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1751,14 +1751,16 @@ class InfoExtractor(object): # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI # or any YouTube dashsegments video fragments = [] - s_num = 0 - for segment_url in representation_ms_info['segment_urls']: - s = representation_ms_info['s'][s_num] + segment_index = 0 + timescale = representation_ms_info['timescale'] + for s in representation_ms_info['s']: + duration = float_or_none(s['d'], timescale) for r in range(s.get('r', 0) + 1): fragments.append({ - 'url': segment_url, - 'duration': float_or_none(s['d'], representation_ms_info['timescale']), + 'url': representation_ms_info['segment_urls'][segment_index], + 'duration': duration, }) + segment_index += 1 representation_ms_info['fragments'] = fragments # NB: MPD manifest may contain direct URLs to unfragmented media. # No fragments key is present in this case. From c58c2d63cbde07af66885829b7c3dbcdfbc096dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 05:56:43 +0700 Subject: [PATCH 0123/1696] [extractor/common] Document forgotten fragment base and path interfaces --- youtube_dl/extractor/common.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a3048fb59..fb484b6f2 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -121,9 +121,19 @@ class InfoExtractor(object): download, lower-case. "http", "https", "rtsp", "rtmp", "rtmpe", "m3u8", "m3u8_native" or "http_dash_segments". - * fragments A list of fragments of the fragmented media, - with the following entries: - * "url" (mandatory) - fragment's URL + * fragment_base_url + Base URL for fragments. Each fragment's path + value (if present) will be relative to + this URL. + * fragments A list of fragments of a fragmented media. + Each fragment entry must contain either an url + or a path. If an url is present it should be + considered by a client. Otherwise both path and + fragment_base_url must be present. Here is + the list of all potential fields: + * "url" - fragment's URL + * "path" - fragment's path relative to + fragment_base_url * "duration" (optional, int or float) * "filesize" (optional, int) * preference Order number of this format. If this field is From e228616c6e73561f0c6d32d6b681bbba321c06aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jan 2017 06:57:39 +0700 Subject: [PATCH 0124/1696] [extractor/common] Fix initialization template (closes #11605, closes #11825) --- youtube_dl/extractor/common.py | 48 ++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index fb484b6f2..5a15a9536 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1637,12 +1637,12 @@ class InfoExtractor(object): segment_template = element.find(_add_ns('SegmentTemplate')) if segment_template is not None: extract_common(segment_template) - media_template = segment_template.get('media') - if media_template: - ms_info['media_template'] = media_template + media = segment_template.get('media') + if media: + ms_info['media'] = media initialization = segment_template.get('initialization') if initialization: - ms_info['initialization_url'] = initialization + ms_info['initialization'] = initialization else: extract_Initialization(segment_template) return ms_info @@ -1686,6 +1686,7 @@ class InfoExtractor(object): lang = representation_attrib.get('lang') url_el = representation.find(_add_ns('BaseURL')) filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None) + bandwidth = int_or_none(representation_attrib.get('bandwidth')) f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, 'url': base_url, @@ -1693,7 +1694,7 @@ class InfoExtractor(object): 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), 'height': int_or_none(representation_attrib.get('height')), - 'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000), + 'tbr': int_or_none(bandwidth, 1000), 'asr': int_or_none(representation_attrib.get('audioSamplingRate')), 'fps': int_or_none(representation_attrib.get('frameRate')), 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None, @@ -1702,13 +1703,32 @@ class InfoExtractor(object): } f.update(parse_codecs(representation_attrib.get('codecs'))) representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) - if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info: - media_template = representation_ms_info['media_template'] - media_template = media_template.replace('$RepresentationID$', representation_id) - media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template) - media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template) - media_template.replace('$$', '$') + def prepare_template(template_name, identifiers): + t = representation_ms_info[template_name] + t = t.replace('$RepresentationID$', representation_id) + t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t) + t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t) + t.replace('$$', '$') + return t + + # @initialization is a regular template like @media one + # so it should be handled just the same way (see + # https://github.com/rg3/youtube-dl/issues/11605) + if 'initialization' in representation_ms_info: + initialization_template = prepare_template( + 'initialization', + # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and + # $Time$ shall not be included for @initialization thus + # only $Bandwidth$ remains + ('Bandwidth', )) + representation_ms_info['initialization_url'] = initialization_template % { + 'Bandwidth': bandwidth, + } + + if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: + + media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time')) # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ # can't be used at the same time @@ -1720,7 +1740,7 @@ class InfoExtractor(object): representation_ms_info['fragments'] = [{ 'url': media_template % { 'Number': segment_number, - 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')), + 'Bandwidth': bandwidth, }, 'duration': segment_duration, } for segment_number in range( @@ -1738,7 +1758,7 @@ class InfoExtractor(object): def add_segment_url(): segment_url = media_template % { 'Time': segment_time, - 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')), + 'Bandwidth': bandwidth, 'Number': segment_number, } representation_ms_info['fragments'].append({ @@ -1780,7 +1800,7 @@ class InfoExtractor(object): 'protocol': 'http_dash_segments', }) if 'initialization_url' in representation_ms_info: - initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id) + initialization_url = representation_ms_info['initialization_url'] if not f.get('url'): f['url'] = initialization_url f['fragments'].append({'url': initialization_url}) From f13da8af289d7d9365e34ef705a53ac62aa3b570 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Sat, 28 Jan 2017 17:52:07 +0100 Subject: [PATCH 0125/1696] [azmedien:playlist] Add support for topic and themen playlists --- youtube_dl/extractor/azmedien.py | 39 +++++++++++++++++++++++++----- youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index a89f71c20..cbc3ed564 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -5,8 +5,9 @@ import re from .common import InfoExtractor from .kaltura import KalturaIE from ..utils import ( - get_element_by_class, + get_element_by_id, strip_or_none, + urljoin, ) @@ -83,8 +84,8 @@ class AZMedienIE(AZMedienBaseIE): return self._kaltura_video(partner_id, entry_id) -class AZMedienShowIE(AZMedienBaseIE): - IE_DESC = 'AZ Medien shows' +class AZMedienPlaylistIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien playlists' _VALID_URL = r'''(?x) https?:// (?:www\.)? @@ -93,7 +94,12 @@ class AZMedienShowIE(AZMedienBaseIE): telebaern\.tv| telem1\.ch )/ - (?P[0-9]+-show-[^/\#]+ + (?P[0-9]+- + (?: + show| + topic| + themen + )-[^/\#]+ (?: /[0-9]+-episode-[^/\#]+ )? @@ -108,6 +114,18 @@ class AZMedienShowIE(AZMedienBaseIE): 'title': 'News - Donnerstag, 15. Dezember 2016', }, 'playlist_count': 9, + }, { + # URL with 'themen' + 'url': 'http://www.telem1.ch/258-themen-tele-m1-classics', + 'info_dict': { + 'id': '258-themen-tele-m1-classics', + 'title': 'Tele M1 Classics', + }, + 'playlist_mincount': 15, + }, { + # URL with 'topic', contains nested playlists + 'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen', + 'only_matching': True, }, { # URL with 'show' only 'url': 'http://www.telezueri.ch/86-show-talktaeglich', @@ -136,10 +154,19 @@ class AZMedienShowIE(AZMedienBaseIE): for m in re.finditer( r']+data-real=(["\'])(?Phttp.+?)\1', webpage)] + if not entries: + entries = [ + # May contain nested playlists (e.g. [1]) thus no explicit + # ie_key + # 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen) + self.url_result(urljoin(url, m.group('url'))) + for m in re.finditer( + r']+name=[^>]+href=(["\'])(?P/.+?)\1', webpage)] + title = self._search_regex( r'episodeShareTitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'title', - default=strip_or_none(get_element_by_class( - 'title-block-cell', webpage)), group='title') + default=strip_or_none(get_element_by_id( + 'video-title', webpage)), group='title') return self.playlist_result(entries, show_id, title) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 086a2296d..2590b5e1b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -82,7 +82,7 @@ from .awaan import ( ) from .azmedien import ( AZMedienIE, - AZMedienShowIE, + AZMedienPlaylistIE, ) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE From fe323a4800d67d0ad2fecebcc3b627a7a22be427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 Jan 2017 21:21:26 +0700 Subject: [PATCH 0126/1696] [ChangeLog] Actualize --- ChangeLog | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ChangeLog b/ChangeLog index 8e5a04b42..ab2818f9e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +version <unreleased> + +Core +* [extractor/common] Fix initialization template (#11605, #11825) ++ [extractor/common] Document fragment_base_url and fragment's path fields +* [extractor/common] Fix duration per DASH segment (#11868) ++ Introduce --autonumber-start option for initial value of %(autonumber)s + template (#727, #2702, #9362, #10457, #10529, #11862) + +Extractors ++ [azmedien:playlist] Add support for topic and themen playlists (#11817) +* [npo] Fix subtitles extraction ++ [itv] Extract subtitles ++ [itv] Add support for itv.com (#9240) ++ [mtv81] Add support for mtv81.com (#7619) ++ [vlive] Add support for channels (#11826) ++ [kaltura] Add fallback for fileExt ++ [kaltura] Improve uploader_id extraction ++ [konserthusetplay] Add support for rspoplay.se (#11828) + + version 2017.01.28 Core From 4d2fdb07c47e2d9f96d58f5fbf3da8665a1144a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 Jan 2017 13:21:42 +0700 Subject: [PATCH 0127/1696] release 2017.01.29 --- .github/ISSUE_TEMPLATE.md | 6 ++--- ChangeLog | 2 +- README.md | 47 +++++++++++++++++++-------------------- docs/supportedsites.md | 5 ++++- youtube_dl/version.py | 2 +- 5 files changed, 32 insertions(+), 30 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 693f3b745..10c982fd0 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.28** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.29*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.29** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.28 +[debug] youtube-dl version 2017.01.29 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ab2818f9e..cd7017f6d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.29 Core * [extractor/common] Fix initialization template (#11605, #11825) diff --git a/README.md b/README.md index 4f677d0cc..2ee00f515 100644 --- a/README.md +++ b/README.md @@ -88,8 +88,6 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --mark-watched Mark videos watched (YouTube only) --no-mark-watched Do not mark videos watched (YouTube only) --no-color Do not emit color codes in output - --abort-on-unavailable-fragment Abort downloading when some fragment is not - available ## Network Options: --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. @@ -99,16 +97,13 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo string (--proxy "") for direct connection --socket-timeout SECONDS Time to wait before giving up, in seconds --source-address IP Client-side IP address to bind to - (experimental) -4, --force-ipv4 Make all connections via IPv4 - (experimental) -6, --force-ipv6 Make all connections via IPv6 - (experimental) --geo-verification-proxy URL Use this proxy to verify the IP address for some geo-restricted sites. The default proxy specified by --proxy (or none, if the options is not present) is used for the - actual downloading. (experimental) + actual downloading. ## Video Selection: --playlist-start NUMBER Playlist video to start at (default is 1) @@ -139,23 +134,23 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo COUNT views --max-views COUNT Do not download any videos with more than COUNT views - --match-filter FILTER Generic video filter (experimental). - Specify any key (see help for -o for a list - of available keys) to match if the key is - present, !key to check if the key is not - present,key > NUMBER (like "comment_count > - 12", also works with >=, <, <=, !=, =) to - compare against a number, and & to require - multiple matches. Values which are not - known are excluded unless you put a - question mark (?) after the operator.For - example, to only match videos that have - been liked more than 100 times and disliked - less than 50 times (or the dislike - functionality is not available at the given - service), but who also have a description, - use --match-filter "like_count > 100 & - dislike_count <? 50 & description" . + --match-filter FILTER Generic video filter. Specify any key (see + help for -o for a list of available keys) + to match if the key is present, !key to + check if the key is not present,key > + NUMBER (like "comment_count > 12", also + works with >=, <, <=, !=, =) to compare + against a number, and & to require multiple + matches. Values which are not known are + excluded unless you put a question mark (?) + after the operator.For example, to only + match videos that have been liked more than + 100 times and disliked less than 50 times + (or the dislike functionality is not + available at the given service), but who + also have a description, use --match-filter + "like_count > 100 & dislike_count <? 50 & + description" . --no-playlist Download only the video, if the URL refers to a video and a playlist. --yes-playlist Download the playlist, if the URL refers to @@ -178,6 +173,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo only) --skip-unavailable-fragments Skip unavailable fragments (DASH and hlsnative only) + --abort-on-unavailable-fragment Abort downloading when some fragment is not + available --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024) --no-resize-buffer Do not automatically adjust the buffer @@ -210,7 +207,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --autonumber-size NUMBER Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option - is given + is given (default is 5) + --autonumber-start NUMBER Specify the start value for %(autonumber)s + (default is 1) --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 6318a862f..d4231577b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -76,7 +76,7 @@ - **awaan:season** - **awaan:video** - **AZMedien**: AZ Medien videos - - **AZMedienShow**: AZ Medien shows + - **AZMedienPlaylist**: AZ Medien playlists - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -337,6 +337,7 @@ - **IPrima** - **iqiyi**: 爱奇艺 - **Ir90Tv** + - **ITV** - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV @@ -445,6 +446,7 @@ - **mtg**: MTG services - **mtv** - **mtv.de** + - **mtv81** - **mtv:video** - **mtvservices:embedded** - **MuenchenTV**: münchen.tv @@ -887,6 +889,7 @@ - **vk:uservideos**: VK - User's Videos - **vk:wallpost** - **vlive** + - **vlive:channel** - **Vodlocker** - **VODPlatform** - **VoiceRepublic** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c22c410a8..a37a65db9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.28' +__version__ = '2017.01.29' From c2d9c25f818da2e0e622b475ffc714f35df0887c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 29 Jan 2017 16:03:39 +0100 Subject: [PATCH 0128/1696] [compat] add compat_etree_register_namespace --- youtube_dl/compat.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 02abf8c1e..49e3c90e2 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2529,6 +2529,24 @@ else: el.text = el.text.decode('utf-8') return doc +if hasattr(etree, 'register_namespace'): + compat_etree_register_namespace = etree.register_namespace +else: + def compat_etree_register_namespace(prefix, uri): + """Register a namespace prefix. + The registry is global, and any existing mapping for either the + given prefix or the namespace URI will be removed. + *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and + attributes in this namespace will be serialized with prefix if possible. + ValueError is raised if prefix is reserved or is invalid. + """ + if re.match(r"ns\d+$", prefix): + raise ValueError("Prefix format reserved for internal use") + for k, v in list(etree._namespace_map.items()): + if k == uri or v == prefix: + del etree._namespace_map[k] + etree._namespace_map[uri] = prefix + if sys.version_info < (2, 7): # Here comes the crazy part: In 2.6, if the xpath is a unicode, # .//node does not match if a node is a direct child of . ! From 4719419951ced20e42cddb26b437908ba636debb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 29 Jan 2017 16:04:15 +0100 Subject: [PATCH 0129/1696] [itv] fix extraction in python 2.6 --- youtube_dl/extractor/itv.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index 0328c7093..b0d860452 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -6,7 +6,10 @@ import xml.etree.ElementTree as etree import json from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_etree_register_namespace, +) from ..utils import ( extract_attributes, xpath_with_ns, @@ -47,7 +50,7 @@ class ITVIE(InfoExtractor): 'com': 'http://schemas.itv.com/2009/05/Common', } for ns, full_ns in ns_map.items(): - etree.register_namespace(ns, full_ns) + compat_etree_register_namespace(ns, full_ns) def _add_ns(name): return xpath_with_ns(name, ns_map) From dadb836139f070da9364439bf3b148eec8bc0b11 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 30 Jan 2017 09:32:31 +0100 Subject: [PATCH 0130/1696] [ruutu] extract dash formats --- youtube_dl/extractor/ruutu.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index f12bc5614..20d01754a 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -81,6 +81,9 @@ class RuutuIE(InfoExtractor): elif ext == 'f4m': formats.extend(self._extract_f4m_formats( video_url, video_id, f4m_id='hds', fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) else: proto = compat_urllib_parse_urlparse(video_url).scheme if not child.tag.startswith('HTTP') and proto != 'rtmp': From 75822ca7909d7f7e15694f73b05b2bf0f1fa61f3 Mon Sep 17 00:00:00 2001 From: Thomas Christlieb <thomaschristlieb@hotmail.com> Date: Tue, 31 Jan 2017 10:03:31 +0100 Subject: [PATCH 0131/1696] New parameter --playlist-random to randomize playlist download order. Fixes #11889 --- youtube_dl/YoutubeDL.py | 5 +++++ youtube_dl/__init__.py | 1 + youtube_dl/options.py | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c71e94518..a7bf5a1b0 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -24,6 +24,7 @@ import sys import time import tokenize import traceback +import random from .compat import ( compat_basestring, @@ -159,6 +160,7 @@ class YoutubeDL(object): playlistend: Playlist item to end at. playlist_items: Specific indices of playlist to download. playlistreverse: Download playlist items in reverse order. + playlistrandom: Download playlist items in random order. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. @@ -842,6 +844,9 @@ class YoutubeDL(object): if self.params.get('playlistreverse', False): entries = entries[::-1] + if self.params.get('playlistrandom', False): + random.shuffle(entries) + for i, entry in enumerate(entries, 1): self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) extra = { diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 2b156342a..5c5b8094b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -344,6 +344,7 @@ def _real_main(argv=None): 'playliststart': opts.playliststart, 'playlistend': opts.playlistend, 'playlistreverse': opts.playlist_reverse, + 'playlistrandom': opts.playlist_random, 'noplaylist': opts.noplaylist, 'logtostderr': opts.outtmpl == '-', 'consoletitle': opts.consoletitle, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 3abf621c0..349f44778 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -470,6 +470,10 @@ def parseOpts(overrideArguments=None): '--playlist-reverse', action='store_true', help='Download playlist videos in reverse order') + downloader.add_option( + '--playlist-random', + action='store_true', + help='Download playlist videos in random order') downloader.add_option( '--xattr-set-filesize', dest='xattr_set_filesize', action='store_true', From ae9a173b6421a3fdf70dd50d2dc0386f8861fe71 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 31 Jan 2017 14:47:56 +0100 Subject: [PATCH 0132/1696] [vimeo] extract both mixed and separated dash formats --- youtube_dl/extractor/vimeo.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index c12eeadd4..8b6a5cc3c 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -142,10 +142,19 @@ class VimeoBaseInfoExtractor(InfoExtractor): note='Downloading %s m3u8 information' % cdn_name, fatal=False)) elif files_type == 'dash': - formats.extend(self._extract_mpd_formats( - manifest_url.replace('/master.json', '/master.mpd'), video_id, format_id, - 'Downloading %s MPD information' % cdn_name, - fatal=False)) + mpd_pattern = r'/%s/(?:sep/)?video/' % video_id + mpd_manifest_urls = [] + if re.search(mpd_pattern, manifest_url): + for suffix, repl in (('', 'video'), ('_sep', 'sep/video')): + mpd_manifest_urls.append((format_id + suffix, re.sub( + mpd_pattern, '/%s/%s/' % (video_id, repl), manifest_url))) + else: + mpd_manifest_urls = [(format_id, manifest_url)] + for f_id, m_url in mpd_manifest_urls: + formats.extend(self._extract_mpd_formats( + m_url.replace('/master.json', '/master.mpd'), video_id, f_id, + 'Downloading %s MPD information' % cdn_name, + fatal=False)) subtitles = {} text_tracks = config['request'].get('text_tracks') From 3c90cc8b6fc069930264b41f5505dc34c1077442 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 31 Jan 2017 22:19:29 +0700 Subject: [PATCH 0133/1696] [youtube] Fix extraction for domainless player URLs Closes #11890 Closes #11891 Closes #11892 Closes #11894 Closes #11895 Closes #11897 Closes #11900 Closes #11903 Closes #11904 Closes #11906 Closes #11907 Closes #11909 Closes #11913 Closes #11914 Closes #11915 Closes #11916 Closes #11917 Closes #11918 Closes #11919 --- youtube_dl/extractor/youtube.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 630586796..ea398bcc8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1028,8 +1028,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode, - 'Initial JS player signature function name') + (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', + r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('), + jscode, 'Initial JS player signature function name', group='sig') jsi = JSInterpreter(jscode) initial_function = jsi.extract_function(funcname) @@ -1050,6 +1051,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if player_url.startswith('//'): player_url = 'https:' + player_url + elif not re.match(r'https?://', player_url): + player_url = compat_urlparse.urljoin( + 'https://www.youtube.com', player_url) try: player_id = (player_url, self._signature_cache_id(s)) if player_id not in self._player_cache: From 3a528ffd8944417c99b139da18d0dff907ade517 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 31 Jan 2017 22:21:54 +0700 Subject: [PATCH 0134/1696] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index cd7017f6d..e331acacc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +version <unreleased> + +Core ++ [compat] Add compat_etree_register_namespace + +Extractors +* [youtube] Fix extraction for domainless player URLs (#11890, #11891, #11892, + #11894, #11895, #11897, #11900, #11903, #11904, #11906, #11907, #11909, + #11913, #11914, #11915, #11916, #11917, #11918, #11919) ++ [vimeo] Extract both mixed and separated DASH formats ++ [ruutu] Extract DASH formats +* [itv] Fix extraction for python 2.6 + + version 2017.01.29 Core From d7e215b42dcaf71298a7e1dc953cf93523b3da81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 31 Jan 2017 22:24:45 +0700 Subject: [PATCH 0135/1696] release 2017.01.31 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 10c982fd0..180013f72 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.29*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.29** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.31** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.29 +[debug] youtube-dl version 2017.01.31 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e331acacc..d5ab0e0a7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.01.31 Core + [compat] Add compat_etree_register_namespace diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a37a65db9..fee0ac7c5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.29' +__version__ = '2017.01.31' From 8fd65faece98139def3a6538e98053bebd400263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Odd=20Str=C3=A5b=C3=B8?= <oddstr13@openshell.no> Date: Sat, 14 Jan 2017 02:36:04 +0100 Subject: [PATCH 0136/1696] [NRKTV] Added NRKTVSeriesIE [NRKTV] Added season and episode number to metadata. [NRKTV] Added category to metadata. [NRKTV] Added tests to NRKTVSeries. [NRKTV] Fixed whitespace issues (flake8). --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nrk.py | 49 ++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2590b5e1b..06e6d4620 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -668,6 +668,7 @@ from .nrk import ( NRKTVIE, NRKTVDirekteIE, NRKTVEpisodesIE, + NRKTVSeriesIE, ) from .ntvde import NTVDeIE from .ntvru import NTVRuIE diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index ea7be005a..26604f84f 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -128,6 +128,18 @@ class NRKBaseIE(InfoExtractor): series = conviva.get('seriesName') or data.get('seriesTitle') episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') + season_number = None + episode_number = None + if data.get('mediaElementType') == 'Episode': + _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \ + data.get('relativeOriginUrl', '') + EPISODENUM_RE = [ + r'/s(?P<season>\d+)e(?P<episode>\d+)\.', + r'/sesong-(?P<season>\d+)/episode-(?P<episode>\d+)', + ] + season_number = int_or_none(self._search_regex(EPISODENUM_RE, _season_episode, "S##E##", fatal=False, group='season')) + episode_number = int_or_none(self._search_regex(EPISODENUM_RE, _season_episode, "S##E##", fatal=False, group='episode')) + thumbnails = None images = data.get('images') if images and isinstance(images, dict): @@ -140,11 +152,15 @@ class NRKBaseIE(InfoExtractor): } for image in web_images if image.get('imageUrl')] description = data.get('description') + category = data.get('mediaAnalytics', {}).get('category') common_info = { 'description': description, 'series': series, 'episode': episode, + 'season_number': season_number, + 'episode_number': episode_number, + 'categories': [category] if category else None, 'age_limit': parse_age_limit(data.get('legalAge')), 'thumbnails': thumbnails, } @@ -360,6 +376,39 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE): r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False) +class NRKTVSeriesIE(InfoExtractor): + _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+)/?' + _ITEM_RE = r'data-season=["\'](?P<id>\d+)["\']' + _TESTS = [{ + 'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene', + 'playlist_count': 1, + }, { + 'url': 'https://tv.nrk.no/serie/saving-the-human-race', + 'playlist_count': 1, + }, { + 'url': 'https://tv.nrk.no/serie/postmann-pat', + 'playlist_count': 3, + }, { + 'url': 'https://tv.nrk.no/serie/groenn-glede', + 'playlist_count': 9, + }] + + def _real_extract(self, url): + series_id = self._match_id(url) + + webpage = self._download_webpage(url, series_id) + + entries = [ + self.url_result('https://tv.nrk.no/program/Episodes/{series}/{season}'.format( + series=series_id, + season=season_id + )) + for season_id in re.findall(self._ITEM_RE, webpage) + ] + + return self.playlist_result(entries) + + class NRKSkoleIE(InfoExtractor): IE_DESC = 'NRK Skole' _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)' From 7c5329e6f4152b48c5476b1b9b8ab931caa10331 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 00:29:29 +0700 Subject: [PATCH 0137/1696] [nrk] Improve extraction and update tests (closes #11571) --- youtube_dl/extractor/nrk.py | 145 +++++++++++++++++++++++++++--------- 1 file changed, 111 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 26604f84f..fc3c0cd3c 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -134,11 +134,15 @@ class NRKBaseIE(InfoExtractor): _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \ data.get('relativeOriginUrl', '') EPISODENUM_RE = [ - r'/s(?P<season>\d+)e(?P<episode>\d+)\.', - r'/sesong-(?P<season>\d+)/episode-(?P<episode>\d+)', + r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.', + r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})', ] - season_number = int_or_none(self._search_regex(EPISODENUM_RE, _season_episode, "S##E##", fatal=False, group='season')) - episode_number = int_or_none(self._search_regex(EPISODENUM_RE, _season_episode, "S##E##", fatal=False, group='episode')) + season_number = int_or_none(self._search_regex( + EPISODENUM_RE, _season_episode, 'season number', + default=None, group='season')) + episode_number = int_or_none(self._search_regex( + EPISODENUM_RE, _season_episode, 'episode number', + default=None, group='episode')) thumbnails = None images = data.get('images') @@ -243,54 +247,102 @@ class NRKTVIE(NRKBaseIE): 'title': '20 spørsmål 23.05.2014', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', 'duration': 1741, + 'series': '20 spørsmål - TV', + 'episode': '23.05.2014', }, }, { 'url': 'https://tv.nrk.no/program/mdfp15000514', - 'md5': '43d0be26663d380603a9cf0c24366531', 'info_dict': { 'id': 'MDFP15000514CA', 'ext': 'mp4', 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014', 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db', 'duration': 4605, + 'series': 'Kunnskapskanalen', + 'episode': '24.05.2014', + }, + 'params': { + 'skip_download': True, }, }, { # single playlist video 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', - 'md5': 'adbd1dbd813edaf532b0a253780719c2', 'info_dict': { 'id': 'MSPO40010515-part2', 'ext': 'flv', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', }, - 'skip': 'Only works from Norway', + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Video is geo restricted'], + 'skip': 'particular part is not supported currently', }, { 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', 'playlist': [{ - 'md5': '9480285eff92d64f06e02a5367970a7a', 'info_dict': { - 'id': 'MSPO40010515-part1', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'id': 'MSPO40010515AH', + 'ext': 'mp4', + 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)', + 'description': 'md5:c03aba1e917561eface5214020551b7a', + 'duration': 772, + 'series': 'Tour de Ski', + 'episode': '06.01.2015', + }, + 'params': { + 'skip_download': True, }, }, { - 'md5': 'adbd1dbd813edaf532b0a253780719c2', 'info_dict': { - 'id': 'MSPO40010515-part2', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'id': 'MSPO40010515BH', + 'ext': 'mp4', + 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)', + 'description': 'md5:c03aba1e917561eface5214020551b7a', + 'duration': 6175, + 'series': 'Tour de Ski', + 'episode': '06.01.2015', + }, + 'params': { + 'skip_download': True, }, }], 'info_dict': { 'id': 'MSPO40010515', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'duration': 6947.52, + 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015', + 'description': 'md5:c03aba1e917561eface5214020551b7a', + }, + 'expected_warnings': ['Video is geo restricted'], + }, { + 'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13', + 'info_dict': { + 'id': 'KMTE50001317AA', + 'ext': 'mp4', + 'title': 'Anno 13:30', + 'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa', + 'duration': 2340, + 'series': 'Anno', + 'episode': '13:30', + 'season_number': 3, + 'episode_number': 13, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017', + 'info_dict': { + 'id': 'MUHH46000317AA', + 'ext': 'mp4', + 'title': 'Nytt på Nytt 27.01.2017', + 'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b', + 'duration': 1796, + 'series': 'Nytt på nytt', + 'episode': '27.01.2017', + }, + 'params': { + 'skip_download': True, }, - 'skip': 'Only works from Norway', }, { 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', 'only_matching': True, @@ -377,36 +429,61 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE): class NRKTVSeriesIE(InfoExtractor): - _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+)/?' - _ITEM_RE = r'data-season=["\'](?P<id>\d+)["\']' + _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)' + _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)' _TESTS = [{ + 'url': 'https://tv.nrk.no/serie/groenn-glede', + 'info_dict': { + 'id': 'groenn-glede', + 'title': 'Grønn glede', + 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608', + }, + 'playlist_mincount': 9, + }, { + 'url': 'http://tv.nrksuper.no/serie/labyrint', + 'info_dict': { + 'id': 'labyrint', + 'title': 'Labyrint', + 'description': 'md5:58afd450974c89e27d5a19212eee7115', + }, + 'playlist_mincount': 3, + }, { 'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene', - 'playlist_count': 1, + 'only_matching': True, }, { 'url': 'https://tv.nrk.no/serie/saving-the-human-race', - 'playlist_count': 1, + 'only_matching': True, }, { 'url': 'https://tv.nrk.no/serie/postmann-pat', - 'playlist_count': 3, - }, { - 'url': 'https://tv.nrk.no/serie/groenn-glede', - 'playlist_count': 9, + 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return False if NRKTVIE.suitable(url) else super(NRKTVSeriesIE, cls).suitable(url) + def _real_extract(self, url): series_id = self._match_id(url) webpage = self._download_webpage(url, series_id) entries = [ - self.url_result('https://tv.nrk.no/program/Episodes/{series}/{season}'.format( - series=series_id, - season=season_id - )) + self.url_result( + 'https://tv.nrk.no/program/Episodes/{series}/{season}'.format( + series=series_id, season=season_id)) for season_id in re.findall(self._ITEM_RE, webpage) ] - return self.playlist_result(entries) + title = self._html_search_meta( + 'seriestitle', webpage, + 'title', default=None) or self._og_search_title( + webpage, fatal=False) + + description = self._html_search_meta( + 'series_description', webpage, + 'description', default=None) or self._og_search_description(webpage) + + return self.playlist_result(entries, series_id, title, description) class NRKSkoleIE(InfoExtractor): From 363245ad94dfdf0c34b4c2c801e7cf6cea74f39c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 00:30:19 +0700 Subject: [PATCH 0138/1696] Credit @oddstr13 for nrk:series (#11571) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 3ef2800c9..f2875d504 100644 --- a/AUTHORS +++ b/AUTHORS @@ -200,3 +200,4 @@ Paul Hartmann Stephen Chen Fabian Stahl Bagira +Odd Stråbø From c38a67bcd5df639b9d7e7faa8685e76446803527 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 00:49:28 +0700 Subject: [PATCH 0139/1696] [vimeo] Extract license (closes #11880) --- youtube_dl/extractor/vimeo.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 8b6a5cc3c..32179e915 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -218,6 +218,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'user7108434', 'uploader': 'Filippo Valsorda', 'duration': 10, + 'license': 'by-sa', }, }, { @@ -486,6 +487,8 @@ class VimeoIE(VimeoBaseInfoExtractor): '%s said: %s' % (self.IE_NAME, seed_status['title']), expected=True) + cc_license = None + # Extract the config JSON try: try: @@ -499,8 +502,9 @@ class VimeoIE(VimeoBaseInfoExtractor): vimeo_clip_page_config = self._search_regex( r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage, 'vimeo clip page config') - config_url = self._parse_json( - vimeo_clip_page_config, video_id)['player']['config_url'] + page_config = self._parse_json(vimeo_clip_page_config, video_id) + config_url = page_config['player']['config_url'] + cc_license = page_config.get('cc_license') config_json = self._download_webpage(config_url, video_id) config = json.loads(config_json) except RegexNotFoundError: @@ -609,6 +613,12 @@ class VimeoIE(VimeoBaseInfoExtractor): info_dict = self._parse_config(config, video_id) formats.extend(info_dict['formats']) self._vimeo_sort_formats(formats) + + if not cc_license: + cc_license = self._search_regex( + r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1', + webpage, 'license', default=None, group='license') + info_dict.update({ 'id': video_id, 'formats': formats, @@ -618,6 +628,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'view_count': view_count, 'like_count': like_count, 'comment_count': comment_count, + 'license': cc_license, }) return info_dict From c15cd296404e164b72fd7f2666d5875f35057d93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 00:58:02 +0700 Subject: [PATCH 0140/1696] [vimeo] Extract upload timestamp --- youtube_dl/extractor/vimeo.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 32179e915..8ba222224 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -21,7 +21,9 @@ from ..utils import ( sanitized_Request, smuggle_url, std_headers, + try_get, unified_strdate, + unified_timestamp, unsmuggle_url, urlencode_postdata, unescapeHTML, @@ -213,6 +215,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 'description': 'md5:2d3305bad981a06ff79f027f19865021', + 'timestamp': 1355990239, 'upload_date': '20121220', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434', 'uploader_id': 'user7108434', @@ -259,6 +262,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'id': '68375962', 'ext': 'mp4', 'title': 'youtube-dl password protected test video', + 'timestamp': 1371200155, 'upload_date': '20130614', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128', 'uploader_id': 'user18948128', @@ -281,7 +285,8 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio', 'uploader_id': 'atencio', 'uploader': 'Peter Atencio', - 'upload_date': '20130927', + 'timestamp': 1380339469, + 'upload_date': '20130928', 'duration': 187, }, }, @@ -293,6 +298,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'ext': 'mp4', 'title': 'The New Vimeo Player (You Know, For Videos)', 'description': 'md5:2ec900bf97c3f389378a96aee11260ea', + 'timestamp': 1381846109, 'upload_date': '20131015', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff', 'uploader_id': 'staff', @@ -324,6 +330,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader': 'The DMCI', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci', 'uploader_id': 'dmci', + 'timestamp': 1324343742, 'upload_date': '20111220', 'description': 'md5:ae23671e82d05415868f7ad1aec21147', }, @@ -339,6 +346,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader': 'Casey Donahue', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue', 'uploader_id': 'caseydonahue', + 'timestamp': 1250886430, 'upload_date': '20090821', 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', }, @@ -488,6 +496,7 @@ class VimeoIE(VimeoBaseInfoExtractor): expected=True) cc_license = None + timestamp = None # Extract the config JSON try: @@ -505,6 +514,9 @@ class VimeoIE(VimeoBaseInfoExtractor): page_config = self._parse_json(vimeo_clip_page_config, video_id) config_url = page_config['player']['config_url'] cc_license = page_config.get('cc_license') + timestamp = try_get( + page_config, lambda x: x['clip']['uploaded_on'], + compat_str) config_json = self._download_webpage(config_url, video_id) config = json.loads(config_json) except RegexNotFoundError: @@ -573,10 +585,10 @@ class VimeoIE(VimeoBaseInfoExtractor): self._downloader.report_warning('Cannot find video description') # Extract upload date - video_upload_date = None - mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage) - if mobj is not None: - video_upload_date = unified_strdate(mobj.group(1)) + if not timestamp: + timestamp = self._search_regex( + r'<time[^>]+datetime="([^"]+)"', webpage, + 'timestamp', default=None) try: view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count')) @@ -622,7 +634,7 @@ class VimeoIE(VimeoBaseInfoExtractor): info_dict.update({ 'id': video_id, 'formats': formats, - 'upload_date': video_upload_date, + 'timestamp': unified_timestamp(timestamp), 'description': video_description, 'webpage_url': url, 'view_count': view_count, From 26c0f09935d51cc8837230ad48db08acd3744dd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 02:15:52 +0700 Subject: [PATCH 0141/1696] [vimeo] PEP 8 --- youtube_dl/extractor/vimeo.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 8ba222224..61cc469bf 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -22,13 +22,11 @@ from ..utils import ( smuggle_url, std_headers, try_get, - unified_strdate, unified_timestamp, unsmuggle_url, urlencode_postdata, unescapeHTML, parse_filesize, - try_get, ) From 2b2d5d319b563a12e26c55966a047fa5bb039cd0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Feb 2017 16:39:32 +0800 Subject: [PATCH 0142/1696] [crunchyroll] Remove ScaledBorderAndShadow settings See https://github.com/rg3/youtube-dl/pull/9028, especially @lachs0r's comments for the reason behind this change --- youtube_dl/extractor/crunchyroll.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index f811c7f33..109d1c5a8 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -255,8 +255,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style'] output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x'] output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y'] - output += """ScaledBorderAndShadow: no - + output += """ [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding """ From 7882f1115e8eca2d2c958e2dbb6be45450e4027c Mon Sep 17 00:00:00 2001 From: Thomas Christlieb <thomaschristlieb@hotmail.com> Date: Wed, 1 Feb 2017 16:00:41 +0100 Subject: [PATCH 0143/1696] Added new Regex for prosiebensat1 Extractor Description. Fixes #11810 (#11929) * Added new Regex for prosiebensat1 Extractor Description. Fixes #11810 * Using _og_search_description() as a Fallback for Description-Regex * Using _og_search_description() as a Fallback for Description-Regex - Second try * Also added fallback regex * Using _og_search_description() as a Fallback for Description-Regex - Third try * removed fatal=False from search for description regex. default=None should be preferred only * Using fatal=false for _og_search_description * Revert "Using fatal=false for _og_search_description" This reverts commit 2b7e123f9d0f2bd6ada54fa8e4e6035fece5dbf4. * Deleted default=None Parameter for _og_search_property --- youtube_dl/extractor/prosiebensat1.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 03e1b1f7f..6856bacaf 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -375,7 +375,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title') info = self._extract_video_info(url, clip_id) description = self._html_search_regex( - self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) + self._DESCRIPTION_REGEXES, webpage, 'description', default=None) + if description is None: + description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._html_search_regex( self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) From fe5aa197b58be1bbf88a152be0e84f24f1711bd7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Feb 2017 23:13:45 +0800 Subject: [PATCH 0144/1696] [prosiebensat1] PEP8 and update _TESTS --- youtube_dl/extractor/prosiebensat1.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 6856bacaf..5091d8456 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -147,16 +147,12 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', 'info_dict': { 'id': '2104602', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Episode 18 - Staffel 2', 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', 'upload_date': '20131231', 'duration': 5845.04, }, - 'params': { - # rtmp download - 'skip_download': True, - }, }, { 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html', @@ -258,7 +254,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', 'info_dict': { 'id': '2572814', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Andreas Kümmert: Rocket Man', 'description': 'md5:6ddb02b0781c6adf778afea606652e38', 'upload_date': '20131017', @@ -272,7 +268,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html', 'info_dict': { 'id': '2156342', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Kurztrips zum Valentinstag', 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.', 'duration': 307.24, @@ -289,12 +285,13 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'description': 'md5:63b8963e71f481782aeea877658dec84', }, 'playlist_count': 2, + 'skip': 'This video is unavailable', }, { 'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge', 'info_dict': { 'id': '4187506', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Best of Circus HalliGalli', 'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9', 'upload_date': '20151229', @@ -376,7 +373,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): info = self._extract_video_info(url, clip_id) description = self._html_search_regex( self._DESCRIPTION_REGEXES, webpage, 'description', default=None) - if description is None: + if description is None: description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._html_search_regex( From 000f207944e277e63dbec5a60007c30e3187d3fd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Feb 2017 23:16:35 +0800 Subject: [PATCH 0145/1696] [prosiebensat1] Update ChangeLog --- ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index d5ab0e0a7..da5b75b47 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version <unreleased> + +Extractors +* [prosiebensat1] Fix extraction of descriptions (#11810, #11929) + version 2017.01.31 Core From b83ef507b457e6ea8c52265ea42b6c5d2c500a7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 23:15:38 +0700 Subject: [PATCH 0146/1696] [facebook] Fix extraction (closes #11926) --- youtube_dl/extractor/facebook.py | 36 ++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index c0a7fc7d8..47bcc0dbc 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -12,14 +12,16 @@ from ..compat import ( compat_urllib_parse_unquote_plus, ) from ..utils import ( + clean_html, error_to_compat_str, ExtractorError, + get_element_by_id, int_or_none, + js_to_json, limit_length, sanitized_Request, + try_get, urlencode_postdata, - get_element_by_id, - clean_html, ) @@ -243,14 +245,30 @@ class FacebookIE(InfoExtractor): video_data = None + def extract_video_data(instances): + for item in instances: + if item[1][0] == 'VideoConfig': + video_item = item[2][0] + if video_item.get('video_id') == video_id: + return video_item['videoData'] + server_js_data = self._parse_json(self._search_regex( - r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id) - for item in server_js_data.get('instances', []): - if item[1][0] == 'VideoConfig': - video_item = item[2][0] - if video_item.get('video_id') == video_id: - video_data = video_item['videoData'] - break + r'handleServerJS\(({.+})(?:\);|,")', webpage, + 'server js data', default='{}'), video_id, fatal=False) + + if server_js_data: + video_data = extract_video_data(server_js_data.get('instances', [])) + + if not video_data: + server_js_data = self._parse_json( + self._search_regex( + r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet', + webpage, 'js data', default='{}'), + video_id, transform_source=js_to_json, fatal=False) + if server_js_data: + video_data = extract_video_data(try_get( + server_js_data, lambda x: x['jsmods']['instances'], + list) or []) if not video_data: if not fatal_if_no_video: From b996b8809285c2c8526dfe96f5ea9835ea799fe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 23:29:59 +0700 Subject: [PATCH 0147/1696] [ChangeLog] Actualize --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index da5b75b47..d24169af8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,13 @@ version <unreleased> Extractors ++ [facebook] Add another fallback extraction scenario (#11926) * [prosiebensat1] Fix extraction of descriptions (#11810, #11929) +- [crunchyroll] Remove ScaledBorderAndShadow settings (#9028) ++ [vimeo] Extract upload timestamp ++ [vimeo] Extract license (#8726, #11880) ++ [nrk:series] Add support for series (#11571, #11711) + version 2017.01.31 From 50695949937bf399b611ef7957f44aac9fbee9dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Feb 2017 03:20:09 +0700 Subject: [PATCH 0148/1696] release 2017.02.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 180013f72..8914569b6 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.31** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.01.31 +[debug] youtube-dl version 2017.02.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d24169af8..c1e8f643a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.01 Extractors + [facebook] Add another fallback extraction scenario (#11926) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d4231577b..d900f5e12 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -528,6 +528,7 @@ - **NRKTV**: NRK TV and NRK Radio - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte - **NRKTVEpisodes** + - **NRKTVSeries** - **ntv.ru** - **Nuvid** - **NYTimes** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fee0ac7c5..0f9b6b703 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.01.31' +__version__ = '2017.02.01' From da162c1135febbb653a302b598dba2d24ac4e24e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 1 Feb 2017 20:15:25 +0100 Subject: [PATCH 0149/1696] [compat] add compat_etree_register_namespace to __all__ list --- youtube_dl/compat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 49e3c90e2..718902019 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2883,6 +2883,7 @@ __all__ = [ 'compat_cookiejar', 'compat_cookies', 'compat_etree_fromstring', + 'compat_etree_register_namespace', 'compat_expanduser', 'compat_get_terminal_size', 'compat_getenv', From 020c5df52d61af0630be8c982282e110a83fc8df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 1 Feb 2017 23:48:34 +0100 Subject: [PATCH 0150/1696] [elpais] Fix extraction for some URLs (closes #11765) --- ChangeLog | 1 + youtube_dl/extractor/elpais.py | 23 ++++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index c1e8f643a..8e3a04d7d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,7 @@ Extractors + [vimeo] Extract upload timestamp + [vimeo] Extract license (#8726, #11880) + [nrk:series] Add support for series (#11571, #11711) ++ [elpais] Fix extraction for some URLs (#11765) version 2017.01.31 diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py index 8c725a4e6..99e00cf3c 100644 --- a/youtube_dl/extractor/elpais.py +++ b/youtube_dl/extractor/elpais.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import strip_jsonp, unified_strdate class ElPaisIE(InfoExtractor): @@ -29,6 +29,16 @@ class ElPaisIE(InfoExtractor): 'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.', 'upload_date': '20160303', } + }, { + 'url': 'http://elpais.com/elpais/2017/01/26/ciencia/1485456786_417876.html', + 'md5': '9c79923a118a067e1a45789e1e0b0f9c', + 'info_dict': { + 'id': '1485456786_417876', + 'ext': 'mp4', + 'title': 'Hallado un barco de la antigua Roma que naufragó en Baleares hace 1.800 años', + 'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas', + 'upload_date': '20170127', + }, }] def _real_extract(self, url): @@ -37,8 +47,15 @@ class ElPaisIE(InfoExtractor): prefix = self._html_search_regex( r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix') - video_suffix = self._search_regex( - r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL') + id_multimedia = self._search_regex( + r"id_multimedia\s*=\s*'([^']+)'", webpage, 'ID multimedia', default=None) + if id_multimedia: + url_info = self._download_json( + 'http://elpais.com/vdpep/1/?pepid=' + id_multimedia, video_id, transform_source=strip_jsonp) + video_suffix = url_info['mp4'] + else: + video_suffix = self._search_regex( + r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL') video_url = prefix + video_suffix thumbnail_suffix = self._search_regex( r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", From 8bdc149441a86e01c56946090087c005a525260e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 2 Feb 2017 08:05:16 +0100 Subject: [PATCH 0151/1696] [downloader/external:ffmpeg] minimize the use of aac_adtstoasc filter --- youtube_dl/downloader/external.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 5d3e5d8d3..138f353ef 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -17,6 +17,7 @@ from ..utils import ( encodeArgument, handle_youtubedl_headers, check_executable, + is_outdated_version, ) @@ -264,7 +265,9 @@ class FFmpegFD(ExternalFD): if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': args += ['-f', 'mpegts'] else: - args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] + args += ['-f', 'mp4'] + if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): + args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] else: From 81aeafeb44a16b341e47c3bb85d288252a095eda Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 2 Feb 2017 08:07:06 +0100 Subject: [PATCH 0152/1696] [cbc:watch] extract audio codec for audion only formats(fixes #11893) --- youtube_dl/extractor/cbc.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index a291685bf..cf678e7f8 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -296,6 +296,12 @@ class CBCWatchVideoIE(CBCWatchBaseIE): formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False) if len(formats) < 2: formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + for f in formats: + format_id = f.get('format_id') + if format_id.startswith('AAC'): + f['acodec'] = 'aac' + elif format_id.startswith('AC3'): + f['acodec'] = 'ac-3' self._sort_formats(formats) info = { From bd8f48c78b952ebe3bf335185c819e265f63cb50 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 2 Feb 2017 21:51:31 +0800 Subject: [PATCH 0153/1696] [bilibili] Support new Bangumi URLs (closes #11845) To reduce complexity, I don't support old Bangumi URLs directly via _VALID_URL. Instead, I choose to let it go to generic redirection. An example can be found in #10190: http://bangumi.bilibili.com/anime/v/40062 --- ChangeLog | 5 ++ youtube_dl/extractor/bilibili.py | 135 ++++++++++++++++++++++++++--- youtube_dl/extractor/extractors.py | 5 +- 3 files changed, 134 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8e3a04d7d..c27907f51 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version <unreleased> + +Extractors ++ [bilibili] Support new Bangumi URLs (#11845) + version 2017.02.01 Extractors diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 85ea5e6ee..80dd8382e 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -5,19 +5,27 @@ import hashlib import re from .common import InfoExtractor -from ..compat import compat_parse_qs +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) from ..utils import ( + ExtractorError, int_or_none, float_or_none, + parse_iso8601, + smuggle_url, + strip_jsonp, unified_timestamp, + unsmuggle_url, urlencode_postdata, ) class BiliBiliIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', 'info_dict': { @@ -32,25 +40,61 @@ class BiliBiliIE(InfoExtractor): 'uploader': '菊子桑', 'uploader_id': '156160', }, - } + }, { + # Tested in BiliBiliBangumiIE + 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062', + 'only_matching': True, + }, { + 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', + 'md5': '3f721ad1e75030cc06faf73587cfec57', + 'info_dict': { + 'id': '100643', + 'ext': 'mp4', + 'title': 'CHAOS;CHILD', + 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', + }, + 'skip': 'Geo-restricted to China', + }] _APP_KEY = '84956560bc028eb7' _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' + def _report_error(self, result): + if 'message' in result: + raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True) + elif 'code' in result: + raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True) + else: + raise ExtractorError('Can\'t extract Bangumi episode ID') + def _real_extract(self, url): - video_id = self._match_id(url) + url, smuggled_data = unsmuggle_url(url, {}) + + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + anime_id = mobj.group('anime_id') webpage = self._download_webpage(url, video_id) - if 'anime/v' not in url: + if 'anime/' not in url: cid = compat_parse_qs(self._search_regex( [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], webpage, 'player parameters'))['cid'][0] else: + if 'no_bangumi_tip' not in smuggled_data: + self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % ( + video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id))) + headers = { + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + } + headers.update(self.geo_verification_headers()) + js = self._download_json( 'http://bangumi.bilibili.com/web_api/get_source', video_id, data=urlencode_postdata({'episode_id': video_id}), - headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'}) + headers=headers) + if 'result' not in js: + self._report_error(js) cid = js['result']['cid'] payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid) @@ -58,7 +102,11 @@ class BiliBiliIE(InfoExtractor): video_info = self._download_json( 'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign), - video_id, note='Downloading video info page') + video_id, note='Downloading video info page', + headers=self.geo_verification_headers()) + + if 'durl' not in video_info: + self._report_error(video_info) entries = [] @@ -85,7 +133,7 @@ class BiliBiliIE(InfoExtractor): title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title') description = self._html_search_meta('description', webpage) timestamp = unified_timestamp(self._html_search_regex( - r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)) + r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None)) thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) # TODO 'view_count' requires deobfuscating Javascript @@ -99,7 +147,7 @@ class BiliBiliIE(InfoExtractor): } uploader_mobj = re.search( - r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"', + r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"', webpage) if uploader_mobj: info.update({ @@ -123,3 +171,70 @@ class BiliBiliIE(InfoExtractor): 'description': description, 'entries': entries, } + + +class BiliBiliBangumiIE(InfoExtractor): + _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)' + + IE_NAME = 'bangumi.bilibili.com' + IE_DESC = 'BiliBili番剧' + + _TESTS = [{ + 'url': 'http://bangumi.bilibili.com/anime/1869', + 'info_dict': { + 'id': '1869', + 'title': '混沌武士', + 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', + }, + 'playlist_count': 26, + }, { + 'url': 'http://bangumi.bilibili.com/anime/1869', + 'info_dict': { + 'id': '1869', + 'title': '混沌武士', + 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', + }, + 'playlist': [{ + 'md5': '91da8621454dd58316851c27c68b0c13', + 'info_dict': { + 'id': '40062', + 'ext': 'mp4', + 'title': '混沌武士', + 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...', + 'timestamp': 1414538739, + 'upload_date': '20141028', + 'episode': '疾风怒涛 Tempestuous Temperaments', + 'episode_number': 1, + }, + }], + 'params': { + 'playlist_items': '1', + }, + }] + + @classmethod + def suitable(cls, url): + return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url) + + def _real_extract(self, url): + bangumi_id = self._match_id(url) + + # Sometimes this API returns a JSONP response + season_info = self._download_json( + 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id, + bangumi_id, transform_source=strip_jsonp)['result'] + + entries = [{ + '_type': 'url_transparent', + 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}), + 'ie_key': BiliBiliIE.ie_key(), + 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '), + 'episode': episode.get('index_title'), + 'episode_number': int_or_none(episode.get('index')), + } for episode in season_info['episodes']] + + entries = sorted(entries, key=lambda entry: entry.get('episode_number')) + + return self.playlist_result( + entries, bangumi_id, + season_info.get('bangumi_title'), season_info.get('evaluate')) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 06e6d4620..1d1c05d42 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -103,7 +103,10 @@ from .beatport import BeatportIE from .bet import BetIE from .bigflix import BigflixIE from .bild import BildIE -from .bilibili import BiliBiliIE +from .bilibili import ( + BiliBiliIE, + BiliBiliBangumiIE, +) from .biobiochiletv import BioBioChileTVIE from .biqle import BIQLEIE from .bleacherreport import ( From a685751051f277b8ce99ee0949420bca4ea28c28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Feb 2017 22:01:11 +0700 Subject: [PATCH 0154/1696] [youtube:playlist] Recognize TL playlists (closes #11945) --- youtube_dl/extractor/youtube.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ea398bcc8..0e67fdd12 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1857,13 +1857,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist= ) ( - (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,} + (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,} # Top tracks, they can also include dots |(?:MC)[\w\.]* ) .* | - ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,}) + ((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}) )""" _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true' _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?' @@ -1985,6 +1985,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): }, { 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21', 'only_matching': True, + }, { + 'url': 'TLGGrESM50VT6acwMjAyMjAxNw', + 'only_matching': True, }] def _real_initialize(self): From 5a116e13020813f9f1d952504455043986c28b9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Feb 2017 22:45:18 +0700 Subject: [PATCH 0155/1696] [facebook] Fix title extraction (closes #11941) --- youtube_dl/extractor/facebook.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 47bcc0dbc..b325c8200 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -73,7 +73,7 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '274175099429670', 'ext': 'mp4', - 'title': 'Facebook video #274175099429670', + 'title': 'Asif Nawab Butt posted a video to his Timeline.', 'uploader': 'Asif Nawab Butt', 'upload_date': '20140506', 'timestamp': 1399398998, @@ -318,10 +318,16 @@ class FacebookIE(InfoExtractor): video_title = self._html_search_regex( r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>', webpage, 'alternative title', default=None) - video_title = limit_length(video_title, 80) if not video_title: + video_title = self._html_search_meta( + 'description', webpage, 'title') + if video_title: + video_title = limit_length(video_title, 80) + else: video_title = 'Facebook video #%s' % video_id - uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) + uploader = clean_html(get_element_by_id( + 'fbPhotoPageAuthorName', webpage)) or self._search_regex( + r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False) timestamp = int_or_none(self._search_regex( r'<abbr[^>]+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) From c54c01f82dba6d3e982c73c81ad71c49f31d8af1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Feb 2017 23:03:38 +0700 Subject: [PATCH 0156/1696] [go] Relax video id regex (closes #11937) --- youtube_dl/extractor/go.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index c7776b186..a34779b16 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -43,7 +43,10 @@ class GoIE(InfoExtractor): sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() if not video_id: webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id') + video_id = self._search_regex( + # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" + # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood + r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id') brand = self._BRANDS[sub_domain] video_data = self._download_json( 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id), From a22b2fd19bd8c08d50f884d1903486d4f00f76ec Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 3 Feb 2017 01:28:24 +0800 Subject: [PATCH 0157/1696] [youtube] Fix ytsearch* when cookies are provided Closes #11924 The API with `page` is no longer used in browsers, and YouTube always returns {'reload': 'now'} when cookies are provided. See http://youtube.github.io/spfjs/documentation/start/ for how SPF works. Basically appending static link with a `spf` parameter yields the corresponding dynamic link. --- ChangeLog | 1 + youtube_dl/extractor/youtube.py | 22 ++++++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index c27907f51..c80126cfb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [youtube] Fix ytsearch when cookies are provided (#11924) + [bilibili] Support new Bangumi URLs (#11845) version 2017.02.01 diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0e67fdd12..f2f751104 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2348,18 +2348,18 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): videos = [] limit = n + url_query = { + 'search_query': query.encode('utf-8'), + } + url_query.update(self._EXTRA_QUERY_ARGS) + result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) + for pagenum in itertools.count(1): - url_query = { - 'search_query': query.encode('utf-8'), - 'page': pagenum, - 'spf': 'navigate', - } - url_query.update(self._EXTRA_QUERY_ARGS) - result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) data = self._download_json( result_url, video_id='query "%s"' % query, note='Downloading page %s' % pagenum, - errnote='Unable to download API page') + errnote='Unable to download API page', + query={'spf': 'navigate'}) html_content = data[1]['body']['content'] if 'class="search-message' in html_content: @@ -2371,6 +2371,12 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): videos += new_videos if not new_videos or len(videos) > limit: break + next_link = self._html_search_regex( + r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next', + html_content, 'next link', default=None) + if next_link is None: + break + result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link) if len(videos) > n: videos = videos[:n] From b3ee552e4b918fb720111b23147e24fa5475a74b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com> Date: Tue, 31 Jan 2017 07:54:53 +0100 Subject: [PATCH 0158/1696] [utils] Handle single-line comments in js_to_json --- test/test_utils.py | 3 +++ youtube_dl/utils.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index a74d59f34..954bb7d8b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -791,6 +791,9 @@ class TestUtil(unittest.TestCase): on = js_to_json('{ 0: /* " \n */ ",]" , }') self.assertEqual(json.loads(on), {'0': ',]'}) + on = js_to_json('{ 0: // comment\n1 }') + self.assertEqual(json.loads(on), {'0': 1}) + on = js_to_json(r'["<p>x<\/p>"]') self.assertEqual(json.loads(on), ['<p>x</p>']) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index cf46711b9..6c462625b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2107,7 +2107,7 @@ def js_to_json(code): v = m.group(0) if v in ('true', 'false', 'null'): return v - elif v.startswith('/*') or v == ',': + elif v.startswith('/*') or v.startswith('//') or v == ',': return "" if v[0] in ("'", '"'): @@ -2134,7 +2134,7 @@ def js_to_json(code): return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| - /\*.*?\*/|,(?=\s*[\]}])| + /\*.*?\*/|//[^\n]*|,(?=\s*[\]}])| [a-zA-Z_][.a-zA-Z_0-9]*| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) From 0bbcc8a10a4bd339540bf149dd263419fd8b6e66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= <michal@cihar.com> Date: Tue, 31 Jan 2017 07:59:55 +0100 Subject: [PATCH 0159/1696] [iprima] Fix extraction (closes #11920, closes #11896) --- youtube_dl/extractor/iprima.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index da2cdc656..0fe576883 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -65,7 +65,7 @@ class IPrimaIE(InfoExtractor): options = self._parse_json( self._search_regex( - r'(?s)var\s+playerOptions\s*=\s*({.+?});', + r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]', playerpage, 'player options', default='{}'), video_id, transform_source=js_to_json, fatal=False) if options: From 4195096ea8da8237a63e1ba3876dc8856b8605c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Feb 2017 02:55:06 +0700 Subject: [PATCH 0160/1696] [utils] Improve comments processing in js_to_json (closes #11947) --- test/test_utils.py | 24 ++++++++++++++++++++++++ youtube_dl/utils.py | 20 +++++++++++--------- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 954bb7d8b..edc712f07 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -785,12 +785,24 @@ class TestUtil(unittest.TestCase): on = js_to_json('["abc", "def",]') self.assertEqual(json.loads(on), ['abc', 'def']) + on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]') + self.assertEqual(json.loads(on), ['abc', 'def']) + + on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]') + self.assertEqual(json.loads(on), ['abc', 'def']) + on = js_to_json('{"abc": "def",}') self.assertEqual(json.loads(on), {'abc': 'def'}) + on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}') + self.assertEqual(json.loads(on), {'abc': 'def'}) + on = js_to_json('{ 0: /* " \n */ ",]" , }') self.assertEqual(json.loads(on), {'0': ',]'}) + on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }') + self.assertEqual(json.loads(on), {'0': ',]'}) + on = js_to_json('{ 0: // comment\n1 }') self.assertEqual(json.loads(on), {'0': 1}) @@ -803,15 +815,27 @@ class TestUtil(unittest.TestCase): on = js_to_json("['a\\\nb']") self.assertEqual(json.loads(on), ['ab']) + on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/") + self.assertEqual(json.loads(on), ['ab']) + on = js_to_json('{0xff:0xff}') self.assertEqual(json.loads(on), {'255': 255}) + on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}') + self.assertEqual(json.loads(on), {'255': 255}) + on = js_to_json('{077:077}') self.assertEqual(json.loads(on), {'63': 63}) + on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}') + self.assertEqual(json.loads(on), {'63': 63}) + on = js_to_json('{42:42}') self.assertEqual(json.loads(on), {'42': 42}) + on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}') + self.assertEqual(json.loads(on), {'42': 42}) + def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6c462625b..67a847eba 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2103,6 +2103,13 @@ def strip_jsonp(code): def js_to_json(code): + COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*' + SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE) + INTEGER_TABLE = ( + (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16), + (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8), + ) + def fix_kv(m): v = m.group(0) if v in ('true', 'false', 'null'): @@ -2118,11 +2125,6 @@ def js_to_json(code): '\\x': '\\u00', }.get(m.group(0), m.group(0)), v[1:-1]) - INTEGER_TABLE = ( - (r'^(0[xX][0-9a-fA-F]+)\s*:?$', 16), - (r'^(0+[0-7]+)\s*:?$', 8), - ) - for regex, base in INTEGER_TABLE: im = re.match(regex, v) if im: @@ -2134,11 +2136,11 @@ def js_to_json(code): return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| - /\*.*?\*/|//[^\n]*|,(?=\s*[\]}])| + {comment}|,(?={skip}[\]}}])| [a-zA-Z_][.a-zA-Z_0-9]*| - \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| - [0-9]+(?=\s*:) - ''', fix_kv, code) + \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| + [0-9]+(?={skip}:) + '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code) def qualities(quality_ids): From 33da98f4933ddc54c944bae985cfcc7b53563208 Mon Sep 17 00:00:00 2001 From: Justsoos <justso@gmail.com> Date: Wed, 1 Feb 2017 21:30:01 +0800 Subject: [PATCH 0161/1696] [douyutv] Improve room id regex http://www.douyu.com/t/lpl source get extra '\' with "room_id\" (from js coding) --- youtube_dl/extractor/douyutv.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index 2f3c5113e..911594413 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -18,7 +18,7 @@ from ..utils import ( class DouyuTVIE(InfoExtractor): IE_DESC = '斗鱼' - _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?P<id>[A-Za-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?:[^/]+/)*(?P<id>[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.douyutv.com/iseven', 'info_dict': { @@ -68,6 +68,10 @@ class DouyuTVIE(InfoExtractor): }, { 'url': 'http://www.douyu.com/xiaocang', 'only_matching': True, + }, { + # \"room_id\" + 'url': 'http://www.douyu.com/t/lpl', + 'only_matching': True, }] # Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf @@ -82,7 +86,7 @@ class DouyuTVIE(InfoExtractor): else: page = self._download_webpage(url, video_id) room_id = self._html_search_regex( - r'"room_id"\s*:\s*(\d+),', page, 'room id') + r'"room_id\\?"\s*:\s*(\d+),', page, 'room id') room = self._download_json( 'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id, From 45024183aea169dc898902388f782485de02cbac Mon Sep 17 00:00:00 2001 From: Mattias Wadman <mattias.wadman@gmail.com> Date: Fri, 3 Feb 2017 05:10:13 +0100 Subject: [PATCH 0162/1696] [infoq] Add audio only format if available (#11565) * [infoq] Add audio only format if available Refactor cookie code into a function. Renamed formats to http_video, http_audio, rtmp_video Renamed extract functions to video instead of videos as they return one or no video. * [infoq] Rename to _extract_cookies as it more than one * [infoq] Remove redundant determine_ext * [infoq] Add comment about hardcoded URL * [infoq] Use _hidden_inputs instead of messy regex * [infoq] Probe if audio URL is valid Make it possible to pass headers to _is_valid_url * [infoq] Add audio only test --- youtube_dl/extractor/common.py | 4 +-- youtube_dl/extractor/infoq.py | 63 ++++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5a15a9536..2c8ec1417 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1025,13 +1025,13 @@ class InfoExtractor(object): unique_formats.append(f) formats[:] = unique_formats - def _is_valid_url(self, url, video_id, item='video'): + def _is_valid_url(self, url, video_id, item='video', headers={}): url = self._proto_relative_url(url, scheme='http:') # For now assume non HTTP(S) URLs always valid if not (url.startswith('http://') or url.startswith('https://')): return True try: - self._request_webpage(url, video_id, 'Checking %s URL' % item) + self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers) return True except ExtractorError as e: if isinstance(e.cause, compat_urllib_error.URLError): diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index cca0b8a93..9fb71e8ef 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import base64 -from ..compat import compat_urllib_parse_unquote +from ..compat import ( + compat_urllib_parse_unquote, + compat_urlparse, +) from ..utils import determine_ext from .bokecc import BokeCCBaseIE @@ -33,9 +36,21 @@ class InfoQIE(BokeCCBaseIE): 'ext': 'flv', 'description': 'md5:308d981fb28fa42f49f9568322c683ff', }, + }, { + 'url': 'https://www.infoq.com/presentations/Simple-Made-Easy', + 'md5': '0e34642d4d9ef44bf86f66f6399672db', + 'info_dict': { + 'id': 'Simple-Made-Easy', + 'title': 'Simple Made Easy', + 'ext': 'mp3', + 'description': 'md5:3e0e213a8bbd074796ef89ea35ada25b', + }, + 'params': { + 'format': 'bestaudio', + }, }] - def _extract_rtmp_videos(self, webpage): + def _extract_rtmp_video(self, webpage): # The server URL is hardcoded video_url = 'rtmpe://video.infoq.com/cfx/st/' @@ -47,28 +62,53 @@ class InfoQIE(BokeCCBaseIE): playpath = 'mp4:' + real_id return [{ - 'format_id': 'rtmp', + 'format_id': 'rtmp_video', 'url': video_url, 'ext': determine_ext(playpath), 'play_path': playpath, }] - def _extract_http_videos(self, webpage): - http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL') - + def _extract_cookies(self, webpage): policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') + return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % ( + policy, signature, key_pair_id) + def _extract_http_video(self, webpage): + http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL') return [{ - 'format_id': 'http', + 'format_id': 'http_video', 'url': http_video_url, 'http_headers': { - 'Cookie': 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % ( - policy, signature, key_pair_id), + 'Cookie': self._extract_cookies(webpage) }, }] + def _extract_http_audio(self, webpage, video_id): + fields = self._hidden_inputs(webpage) + http_audio_url = fields['filename'] + if http_audio_url is None: + return [] + + cookies_header = {'Cookie': self._extract_cookies(webpage)} + + # base URL is found in the Location header in the response returned by + # GET https://www.infoq.com/mp3download.action?filename=... when logged in. + http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url) + + # audio file seem to be missing some times even if there is a download link + # so probe URL to make sure + if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header): + return [] + + return [{ + 'format_id': 'http_audio', + 'url': http_audio_url, + 'vcodec': 'none', + 'http_headers': cookies_header, + }] + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -80,7 +120,10 @@ class InfoQIE(BokeCCBaseIE): # for China videos, HTTP video URL exists but always fails with 403 formats = self._extract_bokecc_formats(webpage, video_id) else: - formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage) + formats = ( + self._extract_rtmp_video(webpage) + + self._extract_http_video(webpage) + + self._extract_http_audio(webpage, video_id)) self._sort_formats(formats) From d7f9242e301fa7c08542932c9348140cf2e07172 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 3 Feb 2017 12:13:24 +0800 Subject: [PATCH 0163/1696] [ChangeLog] Update after #11565 --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index c80126cfb..487ed3f0f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [infoq] Add audio only formats (#11565) * [youtube] Fix ytsearch when cookies are provided (#11924) + [bilibili] Support new Bangumi URLs (#11845) From 4ce3407d089ae8c34341e6d68267910683d4b500 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 3 Feb 2017 10:15:03 +0100 Subject: [PATCH 0164/1696] [filmon] improve extraction --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/filmon.py | 222 +++++++++++++++++------------ 2 files changed, 132 insertions(+), 95 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c9b9ebd23..e4ee43ee3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -287,7 +287,10 @@ from .fc2 import ( FC2EmbedIE, ) from .fczenit import FczenitIE -from .filmon import FilmOnIE, FilmOnVODIE +from .filmon import ( + FilmOnIE, + FilmOnChannelIE, +) from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE diff --git a/youtube_dl/extractor/filmon.py b/youtube_dl/extractor/filmon.py index 987792fec..f775fe0ba 100644 --- a/youtube_dl/extractor/filmon.py +++ b/youtube_dl/extractor/filmon.py @@ -2,143 +2,177 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import qualities -from ..compat import compat_urllib_request - - -_QUALITY = qualities(('low', 'high')) +from ..compat import ( + compat_str, + compat_HTTPError, +) +from ..utils import ( + qualities, + strip_or_none, + int_or_none, + ExtractorError, +) class FilmOnIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)' + IE_NAME = 'filmon' + _VALID_URL = r'(?:https?://(?:www\.)?filmon\.com/vod/view/|filmon:)(?P<id>\d+)' _TESTS = [{ - 'url': 'https://www.filmon.com/channel/filmon-sports', - 'only_matching': True, + 'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space', + 'info_dict': { + 'id': '24869', + 'ext': 'mp4', + 'title': 'Plan 9 From Outer Space', + 'description': 'Dead human, zombies and vampires', + }, }, { - 'url': 'https://www.filmon.com/tv/2894', - 'only_matching': True, + 'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1', + 'info_dict': { + 'id': '2825', + 'title': 'Popeye Series 1', + 'description': 'The original series of Popeye.', + }, + 'playlist_mincount': 8, }] def _real_extract(self, url): - channel_id = self._match_id(url) + video_id = self._match_id(url) - request = compat_urllib_request.Request('https://www.filmon.com/channel/%s' % (channel_id)) - request.add_header('X-Requested-With', 'XMLHttpRequest') - channel_info = self._download_json(request, channel_id) - now_playing = channel_info['now_playing'] + try: + response = self._download_json( + 'https://www.filmon.com/api/vod/movie?id=%s' % video_id, + video_id)['response'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason'] + raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) + raise - thumbnails = [] - for thumb in now_playing.get('images', ()): - if thumb['type'] != '2': - continue - thumbnails.append({ - 'url': thumb['url'], - 'width': int(thumb['width']), - 'height': int(thumb['height']), - }) + title = response['title'] + description = strip_or_none(response.get('description')) - formats = [] + if response.get('type_id') == 1: + entries = [self.url_result('filmon:' + episode_id) for episode_id in response.get('episodes', [])] + return self.playlist_result(entries, video_id, title, description) - for stream in channel_info['streams']: + QUALITY = qualities(('low', 'high')) + formats = [] + for format_id, stream in response.get('streams', {}).items(): + stream_url = stream.get('url') + if not stream_url: + continue formats.append({ - 'format_id': str(stream['id']), - # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats - # because 0) it doesn't have bitrate variants anyway, and 1) the ids generated - # by that method are highly unstable (because the bitrate is variable) - 'url': stream['url'], - 'resolution': stream['name'], - 'format_note': 'expires after %u seconds' % int(stream['watch-timeout']), + 'format_id': format_id, + 'url': stream_url, 'ext': 'mp4', - 'quality': _QUALITY(stream['quality']), - 'preference': int(stream['watch-timeout']), + 'quality': QUALITY(stream.get('quality')), + 'protocol': 'm3u8_native', }) self._sort_formats(formats) + thumbnails = [] + poster = response.get('poster', {}) + thumbs = poster.get('thumbs', {}) + thumbs['poster'] = poster + for thumb_id, thumb in thumbs.items(): + thumb_url = thumb.get('url') + if not thumb_url: + continue + thumbnails.append({ + 'id': thumb_id, + 'url': thumb_url, + 'width': int_or_none(thumb.get('width')), + 'height': int_or_none(thumb.get('height')), + }) + return { - 'id': str(channel_info['id']), - 'display_id': channel_info['alias'], + 'id': video_id, + 'title': title, 'formats': formats, - # XXX: use the channel description (channel_info['description'])? - 'uploader_id': channel_info['alias'], - 'uploader': channel_info['title'], # XXX: kinda stretching it... - 'title': now_playing.get('programme_name') or channel_info['title'], - 'description': now_playing.get('programme_description'), + 'description': description, 'thumbnails': thumbnails, - 'is_live': True, } -class FilmOnVODIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?filmon\.com/vod/view/(?P<id>\d+)' +class FilmOnChannelIE(InfoExtractor): + IE_NAME = 'filmon:channel' + _VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)' _TESTS = [{ - 'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space', + # VOD + 'url': 'http://www.filmon.com/tv/sports-haters', 'info_dict': { - 'id': '24869', + 'id': '4190', 'ext': 'mp4', - 'title': 'Plan 9 From Outer Space', - 'description': 'Dead human, zombies and vampires', + 'title': 'Sports Haters', + 'description': 'md5:dabcb4c1d9cfc77085612f1a85f8275d', }, }, { - 'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1', - 'info_dict': { - 'id': '2825', - 'title': 'Popeye Series 1', - }, - 'playlist_count': 8, + # LIVE + 'url': 'https://www.filmon.com/channel/filmon-sports', + 'only_matching': True, + }, { + 'url': 'https://www.filmon.com/tv/2894', + 'only_matching': True, }] - def _real_extract(self, url): - video_id = self._match_id(url) + _THUMBNAIL_RES = [ + ('logo', 56, 28), + ('big_logo', 106, 106), + ('extra_big_logo', 300, 300), + ] - result = self._download_json('https://www.filmon.com/api/vod/movie?id=%s' % (video_id), video_id) - if result['code'] != 200: - raise ExtractorError('FilmOn said: %s' % (result['reason']), expected=True) + def _real_extract(self, url): + channel_id = self._match_id(url) - response = result['response'] + try: + channel_data = self._download_json( + 'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message'] + raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) + raise - if response.get('episodes'): - return { - '_type': 'playlist', - 'id': video_id, - 'title': response['title'], - 'entries': [{ - '_type': 'url', - 'url': 'https://www.filmon.com/vod/view/%s' % (ep), - } for ep in response['episodes']] - } + channel_id = compat_str(channel_data['id']) + is_live = not channel_data.get('is_vod') and not channel_data.get('is_vox') + title = channel_data['title'] + QUALITY = qualities(('low', 'high')) formats = [] - for (id, stream) in response['streams'].items(): + for stream in channel_data.get('streams', []): + stream_url = stream.get('url') + if not stream_url: + continue + if not is_live: + formats.extend(self._extract_wowza_formats( + stream_url, channel_id, skip_protocols=['dash', 'rtmp', 'rtsp'])) + continue + quality = stream.get('quality') formats.append({ - 'format_id': id, - 'url': stream['url'], - 'resolution': stream['name'], - 'format_note': 'expires after %u seconds' % int(stream['watch-timeout']), + 'format_id': quality, + # this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats + # because it doesn't have bitrate variants anyway + 'url': stream_url, 'ext': 'mp4', - 'quality': _QUALITY(stream['quality']), - 'preference': int(stream['watch-timeout']), + 'quality': QUALITY(quality), }) self._sort_formats(formats) - poster = response['poster'] - thumbnails = [{ - 'id': 'poster', - 'url': poster['url'], - 'width': poster['width'], - 'height': poster['height'], - }] - for (id, thumb) in poster['thumbs'].items(): + thumbnails = [] + for name, width, height in self._THUMBNAIL_RES: thumbnails.append({ - 'id': id, - 'url': thumb['url'], - 'width': thumb['width'], - 'height': thumb['height'], + 'id': name, + 'url': 'http://static.filmon.com/assets/channels/%s/%s.png' % (channel_id, name), + 'width': width, + 'height': height, }) return { - 'id': video_id, - 'title': response['title'], - 'formats': formats, - 'description': response['description'], + 'id': channel_id, + 'display_id': channel_data.get('alias'), + 'title': self._live_title(title) if is_live else title, + 'description': channel_data.get('description'), 'thumbnails': thumbnails, + 'formats': formats, + 'is_live': is_live, } From daac118bf4e8bf3dc1ec202fe8b21b9319d15dbf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 3 Feb 2017 18:56:40 +0800 Subject: [PATCH 0165/1696] [ChangeLog] Update after #11901 --- ChangeLog | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ChangeLog b/ChangeLog index 487ed3f0f..947590b94 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,14 @@ version <unreleased> +Core ++ Add --playlist-random to shuffle playlists (#11889, #11901) + Extractors + [infoq] Add audio only formats (#11565) * [youtube] Fix ytsearch when cookies are provided (#11924) + [bilibili] Support new Bangumi URLs (#11845) + version 2017.02.01 Extractors From f7a10d8cd6d1378d5f8e67b4b3572fa474b47cde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Feb 2017 21:25:44 +0700 Subject: [PATCH 0166/1696] [sportbox] Remove extractor (closes #11954) Covered by generic extractor --- youtube_dl/extractor/sportbox.py | 54 -------------------------------- 1 file changed, 54 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index b512cd20f..05a0b5a80 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -11,60 +11,6 @@ from ..utils import ( ) -class SportBoxIE(InfoExtractor): - _VALID_URL = r'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)' - _TESTS = [{ - 'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', - 'md5': 'ff56a598c2cf411a9a38a69709e97079', - 'info_dict': { - 'id': '80822', - 'ext': 'mp4', - 'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', - 'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20140928', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4', - 'only_matching': True, - }, { - 'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - - player = self._search_regex( - r'src="/?(vdl/player/[^"]+)"', webpage, 'player') - - title = self._html_search_regex( - [r'"nodetitle"\s*:\s*"([^"]+)"', r'class="node-header_{1,2}title">([^<]+)'], - webpage, 'title') - description = self._og_search_description(webpage) or self._html_search_meta( - 'description', webpage, 'description') - thumbnail = self._og_search_thumbnail(webpage) - upload_date = unified_strdate(self._html_search_meta( - 'dateCreated', webpage, 'upload date')) - - return { - '_type': 'url_transparent', - 'url': compat_urlparse.urljoin(url, '/%s' % player), - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - } - - class SportBoxEmbedIE(InfoExtractor): _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' _TESTS = [{ From b7cc5f078eca4d90b3e3d31d1247452953dba1fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Feb 2017 21:56:10 +0700 Subject: [PATCH 0167/1696] [extractors] Remove remnants of sportbox extractor (#11954) --- youtube_dl/extractor/extractors.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index aa235bec1..eaf3676df 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -888,10 +888,7 @@ from .spiegeltv import SpiegeltvIE from .spike import SpikeIE from .stitcher import StitcherIE from .sport5 import Sport5IE -from .sportbox import ( - SportBoxIE, - SportBoxEmbedIE, -) +from .sportbox import SportBoxEmbedIE from .sportdeutschland import SportDeutschlandIE from .sportschau import SportschauIE from .srgssr import ( From f962790ee53c634758021d9fc752ae476c6a142b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Feb 2017 21:56:48 +0700 Subject: [PATCH 0168/1696] [vine] Fix extraction (closes #11955) --- youtube_dl/extractor/vine.py | 107 +++++++++++++++-------------------- 1 file changed, 46 insertions(+), 61 deletions(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 0183f052a..4957a07f7 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -6,8 +6,9 @@ import itertools from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, - unified_strdate, + unified_timestamp, ) @@ -20,50 +21,16 @@ class VineIE(InfoExtractor): 'id': 'b9KOOWX7HUx', 'ext': 'mp4', 'title': 'Chicken.', - 'alt_title': 'Vine by Jack Dorsey', + 'alt_title': 'Vine by Jack', + 'timestamp': 1368997951, 'upload_date': '20130519', - 'uploader': 'Jack Dorsey', + 'uploader': 'Jack', 'uploader_id': '76', 'view_count': int, 'like_count': int, 'comment_count': int, 'repost_count': int, }, - }, { - 'url': 'https://vine.co/v/MYxVapFvz2z', - 'md5': '7b9a7cbc76734424ff942eb52c8f1065', - 'info_dict': { - 'id': 'MYxVapFvz2z', - 'ext': 'mp4', - 'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14', - 'alt_title': 'Vine by Mars Ruiz', - 'upload_date': '20140815', - 'uploader': 'Mars Ruiz', - 'uploader_id': '1102363502380728320', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - }, - }, { - 'url': 'https://vine.co/v/bxVjBbZlPUH', - 'md5': 'ea27decea3fa670625aac92771a96b73', - 'info_dict': { - 'id': 'bxVjBbZlPUH', - 'ext': 'mp4', - 'title': '#mw3 #ac130 #killcam #angelofdeath', - 'alt_title': 'Vine by Z3k3', - 'upload_date': '20130430', - 'uploader': 'Z3k3', - 'uploader_id': '936470460173008896', - 'view_count': int, - 'like_count': int, - 'comment_count': int, - 'repost_count': int, - }, - }, { - 'url': 'https://vine.co/oembed/MYxVapFvz2z.json', - 'only_matching': True, }, { 'url': 'https://vine.co/v/e192BnZnZ9V', 'info_dict': { @@ -71,6 +38,7 @@ class VineIE(InfoExtractor): 'ext': 'mp4', 'title': 'ยิ้ม~ เขิน~ อาย~ น่าร้ากอ้ะ >//< @n_whitewo @orlameena #lovesicktheseries #lovesickseason2', 'alt_title': 'Vine by Pimry_zaa', + 'timestamp': 1436057405, 'upload_date': '20150705', 'uploader': 'Pimry_zaa', 'uploader_id': '1135760698325307392', @@ -82,43 +50,60 @@ class VineIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://vine.co/v/MYxVapFvz2z', + 'only_matching': True, + }, { + 'url': 'https://vine.co/v/bxVjBbZlPUH', + 'only_matching': True, + }, { + 'url': 'https://vine.co/oembed/MYxVapFvz2z.json', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id) - - data = self._parse_json( - self._search_regex( - r'window\.POST_DATA\s*=\s*({.+?});\s*</script>', - webpage, 'vine data'), - video_id) - - data = data[list(data.keys())[0]] - - formats = [{ - 'format_id': '%(format)s-%(rate)s' % f, - 'vcodec': f.get('format'), - 'quality': f.get('rate'), - 'url': f['videoUrl'], - } for f in data['videoUrls'] if f.get('videoUrl')] + data = self._download_json( + 'https://archive.vine.co/posts/%s.json' % video_id, video_id) + + def video_url(kind): + for url_suffix in ('Url', 'URL'): + format_url = data.get('video%s%s' % (kind, url_suffix)) + if format_url: + return format_url + + formats = [] + for quality, format_id in enumerate(('low', '', 'dash')): + format_url = video_url(format_id.capitalize()) + if not format_url: + continue + # DASH link returns plain mp4 + if format_id == 'dash' and determine_ext(format_url) == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id or 'standard', + 'quality': quality, + }) self._sort_formats(formats) username = data.get('username') return { 'id': video_id, - 'title': data.get('description') or self._og_search_title(webpage), - 'alt_title': 'Vine by %s' % username if username else self._og_search_description(webpage, default=None), + 'title': data.get('description'), + 'alt_title': 'Vine by %s' % username if username else None, 'thumbnail': data.get('thumbnailUrl'), - 'upload_date': unified_strdate(data.get('created')), + 'timestamp': unified_timestamp(data.get('created')), 'uploader': username, 'uploader_id': data.get('userIdStr'), - 'view_count': int_or_none(data.get('loops', {}).get('count')), - 'like_count': int_or_none(data.get('likes', {}).get('count')), - 'comment_count': int_or_none(data.get('comments', {}).get('count')), - 'repost_count': int_or_none(data.get('reposts', {}).get('count')), + 'view_count': int_or_none(data.get('loops')), + 'like_count': int_or_none(data.get('likes')), + 'comment_count': int_or_none(data.get('comments')), + 'repost_count': int_or_none(data.get('reposts')), 'formats': formats, } From 605fd6392fedd2599115e1f1e12df2a6212df1ae Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 3 Feb 2017 17:59:48 +0100 Subject: [PATCH 0169/1696] [youtube] add format info for itag 325 and 328 --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f2f751104..76710931a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -329,6 +329,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'}, '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, + '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'preference': -50, 'container': 'm4a_dash'}, + '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'preference': -50, 'container': 'm4a_dash'}, # Dash webm '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, From f65dba7cdb98bb5444ad5656c9626a15d210f6f6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 3 Feb 2017 22:25:19 +0100 Subject: [PATCH 0170/1696] [myspace] fix extraction and extract hls and http formats --- youtube_dl/extractor/myspace.py | 108 +++++++++++++++++--------------- 1 file changed, 58 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py index ab32e632e..f281238c9 100644 --- a/youtube_dl/extractor/myspace.py +++ b/youtube_dl/extractor/myspace.py @@ -17,9 +17,10 @@ class MySpaceIE(InfoExtractor): _TESTS = [ { 'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919', + 'md5': '9c1483c106f4a695c47d2911feed50a7', 'info_dict': { 'id': '109594919', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Little Big Town', 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.', 'uploader': 'Five Minutes to the Stage', @@ -27,37 +28,30 @@ class MySpaceIE(InfoExtractor): 'timestamp': 1414108751, 'upload_date': '20141023', }, - 'params': { - # rtmp download - 'skip_download': True, - }, }, # songs { 'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681', + 'md5': '1d7ee4604a3da226dd69a123f748b262', 'info_dict': { 'id': '93388656', - 'ext': 'flv', + 'ext': 'm4a', 'title': 'Of weakened soul...', 'uploader': 'Killsorrow', 'uploader_id': 'killsorrow', }, - 'params': { - # rtmp download - 'skip_download': True, - }, }, { - 'add_ie': ['Vevo'], + 'add_ie': ['Youtube'], 'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041', 'info_dict': { - 'id': 'USZM20600099', - 'ext': 'mp4', - 'title': 'Animal I Have Become', - 'uploader': 'Three Days Grace', - 'timestamp': int, - 'upload_date': '20060502', + 'id': 'xqds0B_meys', + 'ext': 'webm', + 'title': 'Three Days Grace - Animal I Have Become', + 'description': 'md5:8bd86b3693e72a077cf863a8530c54bb', + 'uploader': 'ThreeDaysGraceVEVO', + 'uploader_id': 'ThreeDaysGraceVEVO', + 'upload_date': '20091002', }, - 'skip': 'VEVO is only available in some countries', }, { 'add_ie': ['Youtube'], 'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426', @@ -76,24 +70,46 @@ class MySpaceIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + is_song = mobj.group('mediatype').startswith('music/song') webpage = self._download_webpage(url, video_id) player_url = self._search_regex( - r'playerSwf":"([^"?]*)', webpage, 'player URL') + r'videoSwf":"([^"?]*)', webpage, 'player URL', fatal=False) - def rtmp_format_from_stream_url(stream_url, width=None, height=None): - rtmp_url, play_path = stream_url.split(';', 1) - return { - 'format_id': 'rtmp', - 'url': rtmp_url, - 'play_path': play_path, - 'player_url': player_url, - 'protocol': 'rtmp', - 'ext': 'flv', - 'width': width, - 'height': height, - } + def formats_from_stream_urls(stream_url, hls_stream_url, http_stream_url, width=None, height=None): + formats = [] + vcodec = 'none' if is_song else None + if hls_stream_url: + formats.append({ + 'format_id': 'hls', + 'url': hls_stream_url, + 'protocol': 'm3u8_native', + 'ext': 'm4a' if is_song else 'mp4', + 'vcodec': vcodec, + }) + if stream_url and player_url: + rtmp_url, play_path = stream_url.split(';', 1) + formats.append({ + 'format_id': 'rtmp', + 'url': rtmp_url, + 'play_path': play_path, + 'player_url': player_url, + 'protocol': 'rtmp', + 'ext': 'flv', + 'width': width, + 'height': height, + 'vcodec': vcodec, + }) + if http_stream_url: + formats.append({ + 'format_id': 'http', + 'url': http_stream_url, + 'width': width, + 'height': height, + 'vcodec': vcodec, + }) + return formats - if mobj.group('mediatype').startswith('music/song'): + if is_song: # songs don't store any useful info in the 'context' variable song_data = self._search_regex( r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id, @@ -108,8 +124,10 @@ class MySpaceIE(InfoExtractor): return self._search_regex( r'''data-%s=([\'"])(?P<data>.*?)\1''' % name, song_data, name, default='', group='data') - stream_url = search_data('stream-url') - if not stream_url: + formats = formats_from_stream_urls( + search_data('stream-url'), search_data('hls-stream-url'), + search_data('http-stream-url')) + if not formats: vevo_id = search_data('vevo-id') youtube_id = search_data('youtube-id') if vevo_id: @@ -121,6 +139,7 @@ class MySpaceIE(InfoExtractor): else: raise ExtractorError( 'Found song but don\'t know how to download it') + self._sort_formats(formats) return { 'id': video_id, 'title': self._og_search_title(webpage), @@ -128,27 +147,16 @@ class MySpaceIE(InfoExtractor): 'uploader_id': search_data('artist-username'), 'thumbnail': self._og_search_thumbnail(webpage), 'duration': int_or_none(search_data('duration')), - 'formats': [rtmp_format_from_stream_url(stream_url)] + 'formats': formats, } else: video = self._parse_json(self._search_regex( r'context = ({.*?});', webpage, 'context'), video_id)['video'] - formats = [] - hls_stream_url = video.get('hlsStreamUrl') - if hls_stream_url: - formats.append({ - 'format_id': 'hls', - 'url': hls_stream_url, - 'protocol': 'm3u8_native', - 'ext': 'mp4', - }) - stream_url = video.get('streamUrl') - if stream_url: - formats.append(rtmp_format_from_stream_url( - stream_url, - int_or_none(video.get('width')), - int_or_none(video.get('height')))) + formats = formats_from_stream_urls( + video.get('streamUrl'), video.get('hlsStreamUrl'), + video.get('mp4StreamUrl'), int_or_none(video.get('width')), + int_or_none(video.get('height'))) self._sort_formats(formats) return { 'id': video_id, From 2c15db829c1bd8311ed82e2884661271f0cf73ed Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 4 Feb 2017 08:38:28 +0100 Subject: [PATCH 0171/1696] [drtv] add support for live and radio sections(closes #1827)(closes #3427) --- youtube_dl/extractor/drtv.py | 74 +++++++++++++++++++++++++++--- youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 72 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 88d096b30..e966d7483 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -9,12 +9,13 @@ from ..utils import ( mimetype2ext, parse_iso8601, remove_end, + update_url_query, ) class DRTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' - + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' + IE_NAME = 'drtv' _TESTS = [{ 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', 'md5': '25e659cccc9a2ed956110a299fdf5983', @@ -79,9 +80,10 @@ class DRTVIE(InfoExtractor): subtitles = {} for asset in data['Assets']: - if asset.get('Kind') == 'Image': + kind = asset.get('Kind') + if kind == 'Image': thumbnail = asset.get('Uri') - elif asset.get('Kind') == 'VideoResource': + elif kind in ('VideoResource', 'AudioResource'): duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) restricted_to_denmark = asset.get('RestrictedToDenmark') spoken_subtitles = asset.get('Target') == 'SpokenSubtitles' @@ -96,9 +98,13 @@ class DRTVIE(InfoExtractor): preference = -1 format_id += '-spoken-subtitles' if target == 'HDS': - formats.extend(self._extract_f4m_formats( + f4m_formats = self._extract_f4m_formats( uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43', - video_id, preference, f4m_id=format_id)) + video_id, preference, f4m_id=format_id) + if kind == 'AudioResource': + for f in f4m_formats: + f['vcodec'] = 'none' + formats.extend(f4m_formats) elif target == 'HLS': formats.extend(self._extract_m3u8_formats( uri, video_id, 'mp4', entry_protocol='m3u8_native', @@ -112,6 +118,7 @@ class DRTVIE(InfoExtractor): 'format_id': format_id, 'tbr': int_or_none(bitrate), 'ext': link.get('FileFormat'), + 'vcodec': 'none' if kind == 'AudioResource' else None, }) subtitles_list = asset.get('SubtitlesList') if isinstance(subtitles_list, list): @@ -144,3 +151,58 @@ class DRTVIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class DRTVLiveIE(InfoExtractor): + IE_NAME = 'drtv:live' + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)' + _TEST = { + 'url': 'https://www.dr.dk/tv/live/dr1', + 'info_dict': { + 'id': 'dr1', + 'ext': 'mp4', + 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + channel_data = self._download_json( + 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id, + channel_id) + title = self._live_title(channel_data['Title']) + + formats = [] + for streaming_server in channel_data.get('StreamingServers', []): + server = streaming_server.get('Server') + if not server: + continue + link_type = streaming_server.get('LinkType') + for quality in streaming_server.get('Qualities', []): + for stream in quality.get('Streams', []): + stream_path = stream.get('Stream') + if not stream_path: + continue + stream_url = update_url_query( + '%s/%s' % (server, stream_path), {'b': ''}) + if link_type == 'HLS': + formats.extend(self._extract_m3u8_formats( + stream_url, channel_id, 'mp4', + m3u8_id=link_type, fatal=False, live=True)) + elif link_type == 'HDS': + formats.extend(self._extract_f4m_formats(update_url_query( + '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}), + channel_id, f4m_id=link_type, fatal=False)) + self._sort_formats(formats) + + return { + 'id': channel_id, + 'title': title, + 'thumbnail': channel_data.get('PrimaryImageUri'), + 'formats': formats, + 'is_live': True, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index eaf3676df..32420937c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -248,7 +248,10 @@ from .dramafever import ( from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE from .drtuber import DrTuberIE -from .drtv import DRTVIE +from .drtv import ( + DRTVIE, + DRTVLiveIE, +) from .dvtv import DVTVIE from .dumpert import DumpertIE from .defense import DefenseGouvFrIE From 36fce54816eb1f1d792ac7ed4d07e292d44d62f5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 4 Feb 2017 15:23:46 +0100 Subject: [PATCH 0172/1696] [turner] fix downloading of secure hls formats using ffmpeg(closes #11358)(closes #11373)(closes #11800) --- youtube_dl/downloader/external.py | 9 +++++++++ youtube_dl/extractor/turner.py | 8 ++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 138f353ef..41e37261d 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -199,6 +199,15 @@ class FFmpegFD(ExternalFD): args = [ffpp.executable, '-y'] + seekable = info_dict.get('_seekable') + if seekable is not None: + # setting -seekable prevents ffmpeg from guessing if the server + # supports seeking(by adding the header `Range: bytes=0-`), which + # can cause problems in some cases + # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127 + # http://trac.ffmpeg.org/ticket/6125#comment:10 + args += ['-seekable', '1' if seekable else '0'] + args += self._configuration_args() # start_time = info_dict.get('start_time') or 0 diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 57ffedb87..1c0be9fc6 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -100,9 +100,13 @@ class TurnerBaseIE(AdobePassIE): formats.extend(self._extract_smil_formats( video_url, video_id, fatal=False)) elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( video_url, video_id, 'mp4', - m3u8_id=format_id or 'hls', fatal=False)) + m3u8_id=format_id or 'hls', fatal=False) + if '/secure/' in video_url and '?hdnea=' in video_url: + for f in m3u8_formats: + f['_seekable'] = False + formats.extend(m3u8_formats) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( update_url_query(video_url, {'hdcore': '3.7.0'}), From 643dc0fcfed5e5eb152000190d0c7ba9dd577ef8 Mon Sep 17 00:00:00 2001 From: A Connecticut Princess <bugchecker@dibaby.org> Date: Sat, 4 Feb 2017 13:23:35 +0500 Subject: [PATCH 0173/1696] [vk] Catch author blocked error message Example link (video in blocked group): https://vk.com/search?c%5Bq%5D=%D0%9F%D1%80%D1%8B%D0%B6%D0%BE%D0%BA%20c%20%D0%BA%D1%80%D0%B0%D0%BD%D0%B0%20%D0%B2%20%D1%81%D1%82%D0%B8%D0%BB%D0%B5%20%D0%A7%D0%B5%D0%BB%D0%BE%D0%B2%D0%B5%D0%BA%D0%B0-%D0%BF%D0%B0%D1%83%D0%BA%D0%B0&c%5Bsection%5D=video&c%5Bsort%5D=2&z=video-10639516_456240611 --- youtube_dl/extractor/vk.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 6e6c3a0e1..7c42a4f54 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -281,6 +281,11 @@ class VKIE(VKBaseIE): { 'url': 'http://new.vk.com/video205387401_165548505', 'only_matching': True, + }, + { + # This video is no longer available, because its author has been blocked. + 'url': 'https://vk.com/video-10639516_456240611', + 'only_matching': True, } ] @@ -328,6 +333,12 @@ class VKIE(VKBaseIE): r'<!>Access denied': 'Access denied to video %s.', + + r'<!>Видеозапись недоступна, так как её автор был заблокирован.': + 'Video %s is no longer available, because its author has been blocked.', + + r'<!>This video is no longer available, because its author has been blocked.': + 'Video %s is no longer available, because its author has been blocked.', } for error_re, error_msg in ERRORS.items(): From c2521c1ac6bbd24cd5d01ba764f2d084b16c506f Mon Sep 17 00:00:00 2001 From: John Hawkinson <jhawk@mit.edu> Date: Sat, 4 Feb 2017 10:23:14 -0500 Subject: [PATCH 0174/1696] [Piksel] Add another app token regex --- youtube_dl/extractor/piksel.py | 43 ++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py index d44edcdfb..c0c276a50 100644 --- a/youtube_dl/extractor/piksel.py +++ b/youtube_dl/extractor/piksel.py @@ -16,18 +16,33 @@ from ..utils import ( class PikselIE(InfoExtractor): _VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)' - _TEST = { - 'url': 'http://player.piksel.com/v/nv60p12f', - 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235', - 'info_dict': { - 'id': 'nv60p12f', - 'ext': 'mp4', - 'title': 'فن الحياة - الحلقة 1', - 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور', - 'timestamp': 1465231790, - 'upload_date': '20160606', + _TESTS = [ + { + 'url': 'http://player.piksel.com/v/nv60p12f', + 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235', + 'info_dict': { + 'id': 'nv60p12f', + 'ext': 'mp4', + 'title': 'فن الحياة - الحلقة 1', + 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور', + 'timestamp': 1465231790, + 'upload_date': '20160606', + } + }, + { + # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al + 'url': 'https://player.piksel.com/v/v80kqp41', + 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d', + 'info_dict': { + 'id': 'v80kqp41', + 'ext': 'mp4', + 'title': 'WAW- State of Washington vs. Donald J. Trump, et al', + 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.', + 'timestamp': 1486171129, + 'upload_date': '20170204', + } } - } + ] @staticmethod def _extract_url(webpage): @@ -40,8 +55,10 @@ class PikselIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - app_token = self._search_regex( - r'clientAPI\s*:\s*"([^"]+)"', webpage, 'app token') + app_token = self._search_regex([ + r'clientAPI\s*:\s*"([^"]+)"', + r'data-de-api-key\s*=\s*"([^"]+)"' + ], webpage, 'app token') response = self._download_json( 'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token, video_id, query={ From 31487eb9746123b7c4e28be7e48908773beab40c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 22:57:48 +0700 Subject: [PATCH 0175/1696] release 2017.02.04 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 1 + docs/supportedsites.md | 7 +++++-- youtube_dl/version.py | 2 +- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 8914569b6..11fd56038 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.04** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.01 +[debug] youtube-dl version 2017.02.04 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 947590b94..5323769d8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.04 Core + Add --playlist-random to shuffle playlists (#11889, #11901) diff --git a/README.md b/README.md index 2ee00f515..89876bd7a 100644 --- a/README.md +++ b/README.md @@ -182,6 +182,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo automatically resized from an initial value of SIZE. --playlist-reverse Download playlist videos in reverse order + --playlist-random Download playlist videos in random order --xattr-set-filesize Set file xattribute ytdl.filesize with expected file size (experimental) --hls-prefer-native Use the native HLS downloader instead of diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d900f5e12..50a339bc4 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -84,6 +84,7 @@ - **bambuser:channel** - **Bandcamp** - **Bandcamp:album** + - **bangumi.bilibili.com**: BiliBili番剧 - **bbc**: BBC - **bbc.co.uk**: BBC iPlayer - **bbc.co.uk:article**: BBC articles @@ -211,7 +212,8 @@ - **DRBonanza** - **Dropbox** - **DrTuber** - - **DRTV** + - **drtv** + - **drtv:live** - **Dumpert** - **dvtv**: http://video.aktualne.cz/ - **dw** @@ -247,6 +249,8 @@ - **fc2:embed** - **Fczenit** - **fernsehkritik.tv** + - **filmon** + - **filmon:channel** - **Firstpost** - **FiveTV** - **Flickr** @@ -703,7 +707,6 @@ - **Spiegeltv** - **Spike** - **Sport5** - - **SportBox** - **SportBoxEmbed** - **SportDeutschland** - **Sportschau** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0f9b6b703..376b31397 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.01' +__version__ = '2017.02.04' From 8e4041cf3f8e769ee2188f3db4747b7133ab5c2d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 4 Feb 2017 17:02:12 +0100 Subject: [PATCH 0176/1696] [radiocanada] fix extraction for toutv rtmp formats --- youtube_dl/extractor/radiocanada.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 321917ad0..3b40002a8 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -54,9 +54,8 @@ class RadioCanadaIE(InfoExtractor): raise ExtractorError('This video is DRM protected.', expected=True) device_types = ['ipad'] - if app_code != 'toutv': - device_types.append('flash') if not smuggled_data: + device_types.append('flash') device_types.append('android') formats = [] @@ -103,7 +102,7 @@ class RadioCanadaIE(InfoExtractor): continue f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url) protocol = determine_protocol({'url': f_url}) - formats.append({ + f = { 'format_id': '%s-%d' % (protocol, tbr), 'url': f_url, 'ext': 'flv' if protocol == 'rtmp' else ext, @@ -111,7 +110,14 @@ class RadioCanadaIE(InfoExtractor): 'width': int_or_none(url_e.get('width')), 'height': int_or_none(url_e.get('height')), 'tbr': tbr, - }) + } + mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url) + if mobj: + f.update({ + 'url': mobj.group('url') + mobj.group('auth'), + 'play_path': mobj.group('playpath'), + }) + formats.append(f) if protocol == 'rtsp': base_url = self._search_regex( r'rtsp://([^?]+)', f_url, 'base url', default=None) From 9db8f6c54021a9c809c8ae65a37544ad566ed159 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 23:21:07 +0700 Subject: [PATCH 0177/1696] [twitch:stream] Improve _VALID_URL (closes #11971) --- youtube_dl/extractor/twitch.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 1ca159a4d..bbba394b0 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -447,7 +447,14 @@ class TwitchHighlightsIE(TwitchVideosBaseIE): class TwitchStreamIE(TwitchBaseIE): IE_NAME = 'twitch:stream' - _VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?twitch\.tv/| + player\.twitch\.tv/\?.*?\bchannel= + ) + (?P<id>[^/#?]+) + ''' _TESTS = [{ 'url': 'http://www.twitch.tv/shroomztv', @@ -471,8 +478,25 @@ class TwitchStreamIE(TwitchBaseIE): }, { 'url': 'http://www.twitch.tv/miracle_doto#profile-0', 'only_matching': True, + }, { + 'url': 'https://player.twitch.tv/?channel=lotsofs', + 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return (False + if any(ie.suitable(url) for ie in ( + TwitchVideoIE, + TwitchChapterIE, + TwitchVodIE, + TwitchProfileIE, + TwitchAllVideosIE, + TwitchUploadsIE, + TwitchPastBroadcastsIE, + TwitchHighlightsIE)) + else super(TwitchStreamIE, cls).suitable(url)) + def _real_extract(self, url): channel_id = self._match_id(url) From 3144eccf551afc4c5e66e06de541c033e6f90681 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 23:22:28 +0700 Subject: [PATCH 0178/1696] [ChangeLog] Actualize --- ChangeLog | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 5323769d8..fe9cd3440 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,38 @@ +version <unreleased> + +Extractors ++ [twitch:stream] Add support for player.twitch.tv (#11971) + + version 2017.02.04 Core + Add --playlist-random to shuffle playlists (#11889, #11901) +* [utils] Improve comments processing in js_to_json (#11947) +* [utils] Handle single-line comments in js_to_json +* [downloader/external:ffmpeg] Minimize the use of aac_adtstoasc filter Extractors ++ [piksel] Add another app token pattern (#11969) ++ [vk] Capture and output author blocked error message (#11965) ++ [turner] Fix secure HLS formats downloading with ffmpeg (#11358, #11373, + #11800) ++ [drtv] Add support for live and radio sections (#1827, #3427) +* [myspace] Fix extraction and extract HLS and HTTP formats ++ [youtube] Add format info for itag 325 and 328 +* [vine] Fix extraction (#11955) +- [sportbox] Remove extractor (#11954) ++ [filmon] Add support for filmon.com (#11187) + [infoq] Add audio only formats (#11565) +* [douyutv] Improve room id regular expression (#11931) +* [iprima] Fix extraction (#11920, #11896) * [youtube] Fix ytsearch when cookies are provided (#11924) +* [go] Relax video id regular expression (#11937) +* [facebook] Fix title extraction (#11941) ++ [youtube:playlist] Recognize TL playlists (#11945) + [bilibili] Support new Bangumi URLs (#11845) ++ [cbc:watch] Extract audio codec for audio only formats (#11893) ++ [elpais] Fix extraction for some URLs (#11765) version 2017.02.01 @@ -18,7 +44,6 @@ Extractors + [vimeo] Extract upload timestamp + [vimeo] Extract license (#8726, #11880) + [nrk:series] Add support for series (#11571, #11711) -+ [elpais] Fix extraction for some URLs (#11765) version 2017.01.31 From 7bccd5fc8ac35b1a3952522c0aa176c982f20206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 23:23:38 +0700 Subject: [PATCH 0179/1696] [ChangeLog] Actualize --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index fe9cd3440..76be8dbd9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,6 +2,7 @@ version <unreleased> Extractors + [twitch:stream] Add support for player.twitch.tv (#11971) +* [radiocanada] Fix extraction for toutv rtmp formats version 2017.02.04 From a713a86755ba864a7b765fd2ce9a5ac8a8f4cc63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 23:26:39 +0700 Subject: [PATCH 0180/1696] release 2017.02.04.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 11fd56038..15e7d4944 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.04** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.04.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.04 +[debug] youtube-dl version 2017.02.04.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 76be8dbd9..23a729559 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.04.1 Extractors + [twitch:stream] Add support for player.twitch.tv (#11971) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 376b31397..5dde47a26 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.04' +__version__ = '2017.02.04.1' From 3d2c2752c5cd70fc7f9cebe8c4683a1de626017d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 4 Feb 2017 18:18:03 +0100 Subject: [PATCH 0181/1696] [afreecatv] extract rtmp formats --- youtube_dl/extractor/afreecatv.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 4f6cdb8a2..e0a0f7c57 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -221,10 +221,23 @@ class AfreecaTVGlobalIE(AfreecaTVIE): s_url = s.get('purl') if not s_url: continue - # TODO: extract rtmp formats - if s.get('stype') == 'HLS': + stype = s.get('stype') + if stype == 'HLS': formats.extend(self._extract_m3u8_formats( - s_url, channel_id, 'mp4', fatal=False)) + s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False)) + elif stype == 'RTMP': + format_id = [stype] + label = s.get('label') + if label: + format_id.append(label) + formats.append({ + 'format_id': '-'.join(format_id), + 'url': s_url, + 'tbr': int_or_none(s.get('bps')), + 'height': int_or_none(s.get('brt')), + 'ext': 'flv', + 'rtmp_live': True, + }) self._sort_formats(formats) info.update({ From 49bd8d5e2e5c4de8c1c409adffc557cb198f7eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Feb 2017 02:41:22 +0700 Subject: [PATCH 0182/1696] [travis] Add python 3.6 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index c74c9cc12..4833c76e9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ python: - "3.3" - "3.4" - "3.5" + - "3.6" sudo: false script: nosetests test --verbose notifications: From 6fd138bed892ac8ae1714d64f4a53d8ea7a1d5bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Feb 2017 13:36:52 +0700 Subject: [PATCH 0183/1696] [sportbox] PEP 8 --- youtube_dl/extractor/sportbox.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 05a0b5a80..e7bd5bf91 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -4,11 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - js_to_json, - unified_strdate, -) +from ..utils import js_to_json class SportBoxEmbedIE(InfoExtractor): From 6ef3e65a7b244d5e432e764772177c7d48cab237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Feb 2017 13:37:27 +0700 Subject: [PATCH 0184/1696] [videopress] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 22 +++++++ youtube_dl/extractor/videopress.py | 99 ++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+) create mode 100644 youtube_dl/extractor/videopress.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 32420937c..cf608faee 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1095,6 +1095,7 @@ from .videomore import ( VideomoreSeasonIE, ) from .videopremium import VideoPremiumIE +from .videopress import VideoPressIE from .vidio import VidioIE from .vidme import ( VidmeIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a23486620..4156cf27d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -81,6 +81,7 @@ from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE from .openload import OpenloadIE +from .videopress import VideoPressIE class GenericIE(InfoExtractor): @@ -1473,6 +1474,21 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [TwentyMinutenIE.ie_key()], + }, + { + # VideoPress embed + 'url': 'https://en.support.wordpress.com/videopress/', + 'info_dict': { + 'id': 'OcobLTqC', + 'ext': 'm4v', + 'title': 'IMG_5786', + 'timestamp': 1435711927, + 'upload_date': '20150701', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [VideoPressIE.ie_key()], } # { # # TODO: find another test @@ -2438,6 +2454,12 @@ class GenericIE(InfoExtractor): return _playlist_from_matches( openload_urls, ie=OpenloadIE.ie_key()) + # Look for VideoPress embeds + videopress_urls = VideoPressIE._extract_urls(webpage) + if videopress_urls: + return _playlist_from_matches( + videopress_urls, ie=VideoPressIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/videopress.py b/youtube_dl/extractor/videopress.py new file mode 100644 index 000000000..049db25a5 --- /dev/null +++ b/youtube_dl/extractor/videopress.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + float_or_none, + parse_age_limit, + qualities, + try_get, + unified_timestamp, + urljoin, +) + + +class VideoPressIE(InfoExtractor): + _VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)' + _TESTS = [{ + 'url': 'https://videopress.com/embed/kUJmAcSf', + 'md5': '706956a6c875873d51010921310e4bc6', + 'info_dict': { + 'id': 'kUJmAcSf', + 'ext': 'mp4', + 'title': 'VideoPress Demo', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 634.6, + 'timestamp': 1434983935, + 'upload_date': '20150622', + 'age_limit': 0, + }, + }, { + # 17+, requires birth_* params + 'url': 'https://videopress.com/embed/iH3gstfZ', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)', + webpage) + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id, + video_id, query={ + 'birth_month': random.randint(1, 12), + 'birth_day': random.randint(1, 31), + 'birth_year': random.randint(1950, 1995), + }) + + title = video['title'] + + def base_url(scheme): + return try_get( + video, lambda x: x['file_url_base'][scheme], compat_str) + + base_url = base_url('https') or base_url('http') + + QUALITIES = ('std', 'dvd', 'hd') + quality = qualities(QUALITIES) + + formats = [] + for format_id, f in video['files'].items(): + if not isinstance(f, dict): + continue + for ext, path in f.items(): + if ext in ('mp4', 'ogg'): + formats.append({ + 'url': urljoin(base_url, path), + 'format_id': '%s-%s' % (format_id, ext), + 'ext': determine_ext(path, ext), + 'quality': quality(format_id), + }) + original_url = try_get(video, lambda x: x['original'], compat_str) + if original_url: + formats.append({ + 'url': original_url, + 'format_id': 'original', + 'quality': len(QUALITIES), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video.get('description'), + 'thumbnail': video.get('poster'), + 'duration': float_or_none(video.get('duration'), 1000), + 'timestamp': unified_timestamp(video.get('upload_date')), + 'age_limit': parse_age_limit(video.get('rating')), + 'formats': formats, + } From e4e50f60b1040a4b6aa8ecb9139f7d5de195f407 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 5 Feb 2017 21:41:08 +0800 Subject: [PATCH 0185/1696] [googledrive] Fix extraction on Python 3.6 Since Python 3.6, invalid escape sequences are deprecated. It's likely that there are invalid escape sequences somewhere on the webpage, so instead of unescaping the whole webpage, just unescape the URL. See https://bugs.python.org/issue27364. That change was designed for string literals, while it affects the 'unicode_escape' encoding as well. The code path is: str.decode('unicode_escape') codecs.unicode_escape_decode() PyUnicode_DecodeUnicodeEscape() --- ChangeLog | 6 ++++++ youtube_dl/extractor/googledrive.py | 9 +++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 23a729559..a0025ab91 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [googledrive] Fix extraction on Python 3.6 + + version 2017.02.04.1 Extractors diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 766fc26d0..fec36cbbb 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + lowercase_escape, ) @@ -13,12 +14,12 @@ class GoogleDriveIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})' _TESTS = [{ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', - 'md5': '881f7700aec4f538571fa1e0eed4a7b6', + 'md5': 'd109872761f7e7ecf353fa108c0dbe1e', 'info_dict': { 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ', 'ext': 'mp4', 'title': 'Big Buck Bunny.mp4', - 'duration': 46, + 'duration': 45, } }, { # video id is longer than 28 characters @@ -55,7 +56,7 @@ class GoogleDriveIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape') + 'http://docs.google.com/file/d/%s' % video_id, video_id) reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) if reason: @@ -74,7 +75,7 @@ class GoogleDriveIE(InfoExtractor): resolution = fmt.split('/')[1] width, height = resolution.split('x') formats.append({ - 'url': fmt_url, + 'url': lowercase_escape(fmt_url), 'format_id': fmt_id, 'resolution': resolution, 'width': int_or_none(width), From caf0f5f8b7d0854caaf6778fe3a646ee0d7668fe Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 5 Feb 2017 21:48:13 +0800 Subject: [PATCH 0186/1696] [iwara] Fix extraction (closes #11781) --- ChangeLog | 1 + youtube_dl/extractor/iwara.py | 41 +++++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index a0025ab91..77286dbef 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [iwara] Fix extraction (#11781) * [googledrive] Fix extraction on Python 3.6 diff --git a/youtube_dl/extractor/iwara.py b/youtube_dl/extractor/iwara.py index 8d7e7f472..011274b02 100644 --- a/youtube_dl/extractor/iwara.py +++ b/youtube_dl/extractor/iwara.py @@ -3,14 +3,18 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse -from ..utils import remove_end +from ..utils import ( + int_or_none, + mimetype2ext, + remove_end, +) class IwaraIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD', - 'md5': '1d53866b2c514b23ed69e4352fdc9839', + # md5 is unstable 'info_dict': { 'id': 'amVwUl1EHpAD9RD', 'ext': 'mp4', @@ -23,17 +27,17 @@ class IwaraIE(InfoExtractor): 'info_dict': { 'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc', 'ext': 'mp4', - 'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4', + 'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4', 'age_limit': 18, }, 'add_ie': ['GoogleDrive'], }, { 'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq', - 'md5': '1d85f1e5217d2791626cff5ec83bb189', + # md5 is unstable 'info_dict': { 'id': '6liAP9s2Ojc', 'ext': 'mp4', - 'age_limit': 0, + 'age_limit': 18, 'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)', 'description': 'md5:590c12c0df1443d833fbebe05da8c47a', 'upload_date': '20160910', @@ -52,9 +56,9 @@ class IwaraIE(InfoExtractor): # ecchi is 'sexy' in Japanese age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0 - entries = self._parse_html5_media_entries(url, webpage, video_id) + video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id) - if not entries: + if not video_data: iframe_url = self._html_search_regex( r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage, 'iframe URL', group='url') @@ -67,11 +71,24 @@ class IwaraIE(InfoExtractor): title = remove_end(self._html_search_regex( r'<title>([^<]+)', webpage, 'title'), ' | Iwara') - info_dict = entries[0] - info_dict.update({ + formats = [] + for a_format in video_data: + format_id = a_format.get('resolution') + height = int_or_none(self._search_regex( + r'(\d+)p', format_id, 'height', default=None)) + formats.append({ + 'url': a_format['uri'], + 'format_id': format_id, + 'ext': mimetype2ext(a_format.get('mime')) or 'mp4', + 'height': height, + 'quality': 1 if format_id == 'Source' else 0, + }) + + self._sort_formats(formats) + + return { 'id': video_id, 'title': title, 'age_limit': age_limit, - }) - - return info_dict + 'formats': formats, + } From 2ab2c0d1f53f66614eda4fefb042e851e78097f0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 5 Feb 2017 22:30:13 +0800 Subject: [PATCH 0187/1696] [iwara] Add width (closes #11724) The heuristic is from #11724 --- youtube_dl/extractor/iwara.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/iwara.py b/youtube_dl/extractor/iwara.py index 011274b02..a7514fc80 100644 --- a/youtube_dl/extractor/iwara.py +++ b/youtube_dl/extractor/iwara.py @@ -81,6 +81,7 @@ class IwaraIE(InfoExtractor): 'format_id': format_id, 'ext': mimetype2ext(a_format.get('mime')) or 'mp4', 'height': height, + 'width': int_or_none(height / 9.0 * 16.0 if height else None), 'quality': 1 if format_id == 'Source' else 0, }) From 019f4c03717bfd2b887309e5a4c96ea82cbedf34 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 5 Feb 2017 22:47:04 +0800 Subject: [PATCH 0188/1696] [bandcamp] Fix extraction for incomplete albums Closes #11727 --- ChangeLog | 1 + youtube_dl/extractor/bandcamp.py | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 77286dbef..984191925 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [bandcamp] Fix extraction for incomplete albums (#11727) * [iwara] Fix extraction (#11781) * [googledrive] Fix extraction on Python 3.6 diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 88c590e98..056e06376 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -209,6 +209,15 @@ class BandcampAlbumIE(InfoExtractor): 'id': 'entropy-ep', }, 'playlist_mincount': 3, + }, { + # not all tracks have songs + 'url': 'https://insulters.bandcamp.com/album/we-are-the-plague', + 'info_dict': { + 'id': 'we-are-the-plague', + 'title': 'WE ARE THE PLAGUE', + 'uploader_id': 'insulters', + }, + 'playlist_count': 2, }] def _real_extract(self, url): @@ -217,12 +226,16 @@ class BandcampAlbumIE(InfoExtractor): album_id = mobj.group('album_id') playlist_id = album_id or uploader_id webpage = self._download_webpage(url, playlist_id) - tracks_paths = re.findall(r'

', - webpage, 'title') + m3u8_url = ej_links.get('HLSLink') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')) - video_id = self._search_regex( - r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id) + mp4_url = ej_links.get('MP4Link') + if mp4_url: + formats.append({ + 'url': mp4_url, + }) - m3u8_url = self._download_webpage( - 'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/' - % video_id, video_id, headers={'Referer': url}) - formats = self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native') + self._sort_formats(formats) - description = self._html_search_meta('description', webpage) + description = get_elements_by_class('synopsis', webpage)[0] thumbnail = self._html_search_regex( - r'''''', - webpage, "thumbnail url", fatal=False) + r''']+src=(["'])(?P(?!\1).+?/moviecovers/(?!\1).+?)\1''', + webpage, 'thumbnail url', fatal=False, group='url') if thumbnail is not None: - thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..')) + thumbnail = compat_urlparse.urljoin(url, thumbnail) return { 'id': video_id, From 0dac7cbb092c804f1548c4a60f15ac29a7db06b9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 12 Feb 2017 17:24:45 +0100 Subject: [PATCH 0237/1696] [hotstar] improve extraction(closes #12096) - extract all qualities - detect drm protected videos - extract more metadata --- youtube_dl/extractor/hotstar.py | 46 +++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index f05d765d6..3a7a66a34 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -34,11 +34,9 @@ class HotStarIE(InfoExtractor): 'only_matching': True, }] - _GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s' - _GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s' - - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True): - json_data = super(HotStarIE, self)._download_json(url_or_request, video_id, note, fatal=fatal) + def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None): + json_data = super(HotStarIE, self)._download_json( + url_or_request, video_id, note, fatal=fatal, query=query) if json_data['resultCode'] != 'OK': if fatal: raise ExtractorError(json_data['errorDescription']) @@ -48,20 +46,37 @@ class HotStarIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - self._GET_CONTENT_TEMPLATE % video_id, - video_id)['contentInfo'][0] + 'http://account.hotstar.com/AVS/besc', video_id, query={ + 'action': 'GetAggregatedContentDetails', + 'channel': 'PCTV', + 'contentId': video_id, + })['contentInfo'][0] + title = video_data['episodeTitle'] + + if video_data.get('encrypted') == 'Y': + raise ExtractorError('This video is DRM protected.', expected=True) formats = [] - # PCTV for extracting f4m manifest - for f in ('TABLET',): + for f in ('JIO',): format_data = self._download_json( - self._GET_CDN_TEMPLATE % (f, video_id, 'VOD'), - video_id, 'Downloading %s JSON metadata' % f, fatal=False) + 'http://getcdn.hotstar.com/AVS/besc', + video_id, 'Downloading %s JSON metadata' % f, + fatal=False, query={ + 'action': 'GetCDN', + 'asJson': 'Y', + 'channel': f, + 'id': video_id, + 'type': 'VOD', + }) if format_data: - format_url = format_data['src'] + format_url = format_data.get('src') + if not format_url: + continue ext = determine_ext(format_url) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + m3u8_id='hls', fatal=False)) elif ext == 'f4m': # produce broken files continue @@ -75,9 +90,12 @@ class HotStarIE(InfoExtractor): return { 'id': video_id, - 'title': video_data['episodeTitle'], + 'title': title, 'description': video_data.get('description'), 'duration': int_or_none(video_data.get('duration')), 'timestamp': int_or_none(video_data.get('broadcastDate')), 'formats': formats, + 'episode': title, + 'episode_number': int_or_none(video_data.get('episodeNumber')), + 'series': video_data.get('contentTitle'), } From 1e2c3f61fc952620a52a8a3a79bcd1a6f7d8ecae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Feb 2017 03:33:23 +0700 Subject: [PATCH 0238/1696] [travis] Separate builds for core and download --- .travis.yml | 7 ++++++- devscripts/run_tests.sh | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 devscripts/run_tests.sh diff --git a/.travis.yml b/.travis.yml index 4833c76e9..8ba93ec02 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,12 @@ python: - "3.5" - "3.6" sudo: false -script: nosetests test --verbose +env: + - YTDL_TEST_SET=core + - YTDL_TEST_SET=download +before_script: + - chmod +x ./devscripts/run_tests.sh +script: ./devscripts/run_tests.sh notifications: email: - filippo.valsorda@gmail.com diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh new file mode 100644 index 000000000..7f4c1e083 --- /dev/null +++ b/devscripts/run_tests.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter" + +test_set="" + +case "$YTDL_TEST_SET" in + core) + test_set="-I test_($DOWNLOAD_TESTS)\.py" + ;; + download) + test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py" + ;; + *) + break + ;; +esac + +nosetests test --verbose $test_set From 9dad94185367cdfde0de21cd8e595094cbe31acc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Feb 2017 11:43:20 +0100 Subject: [PATCH 0239/1696] [disney] improve extraction - add support for more urls - detect expired videos - skip Adobe Flash Access protected videos closes #4975 closes #11000 closes #11882 closes #11936 --- youtube_dl/extractor/disney.py | 60 ++++++++++++++++++++++++++++----- youtube_dl/extractor/generic.py | 13 ------- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/disney.py b/youtube_dl/extractor/disney.py index 396873c6d..939d1338c 100644 --- a/youtube_dl/extractor/disney.py +++ b/youtube_dl/extractor/disney.py @@ -9,13 +9,15 @@ from ..utils import ( unified_strdate, compat_str, determine_ext, + ExtractorError, ) class DisneyIE(InfoExtractor): _VALID_URL = r'''(?x) - https?://(?P(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|starwars\.com))/(?:embed/|(?:[^/]+/)+[\w-]+-)(?P[a-z0-9]{24})''' + https?://(?P(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P[a-z0-9]{24})|(?:[^/]+/)?(?P[^/?#]+))''' _TESTS = [{ + # Disney.EmbedVideo 'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977', 'info_dict': { 'id': '545ed1857afee5a0ec239977', @@ -28,6 +30,20 @@ class DisneyIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + # Grill.burger + 'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette', + 'info_dict': { + 'id': '5454e9f4e9804a552e3524c8', + 'ext': 'mp4', + 'title': '"Intro" Featurette: Rogue One: A Star Wars Story', + 'upload_date': '20170104', + 'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } }, { 'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2', 'only_matching': True, @@ -43,31 +59,55 @@ class DisneyIE(InfoExtractor): }, { 'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097', 'only_matching': True, + }, { + 'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677', + 'only_matching': True, + }, { + 'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1', + 'only_matching': True, + }, { + 'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo', + 'only_matching': True, + }, { + 'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue', + 'only_matching': True, }] def _real_extract(self, url): - domain, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage( - 'http://%s/embed/%s' % (domain, video_id), video_id) - video_data = self._parse_json(self._search_regex( - r'Disney\.EmbedVideo=({.+});', webpage, 'embed data'), video_id)['video'] + domain, video_id, display_id = re.match(self._VALID_URL, url).groups() + if not video_id: + webpage = self._download_webpage(url, display_id) + grill = re.sub(r'"\s*\+\s*"', '', self._search_regex( + r'Grill\.burger\s*=\s*({.+})\s*:', + webpage, 'grill data')) + page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video') + video_data = page_data['data'][0] + else: + webpage = self._download_webpage( + 'http://%s/embed/%s' % (domain, video_id), video_id) + page_data = self._parse_json(self._search_regex( + r'Disney\.EmbedVideo\s*=\s*({.+});', + webpage, 'embed data'), video_id) + video_data = page_data['video'] for external in video_data.get('externals', []): if external.get('source') == 'vevo': return self.url_result('vevo:' + external['data_id'], 'Vevo') + video_id = video_data['id'] title = video_data['title'] formats = [] for flavor in video_data.get('flavors', []): flavor_format = flavor.get('format') flavor_url = flavor.get('url') - if not flavor_url or not re.match(r'https?://', flavor_url): + if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access': continue tbr = int_or_none(flavor.get('bitrate')) if tbr == 99999: formats.extend(self._extract_m3u8_formats( - flavor_url, video_id, 'mp4', m3u8_id=flavor_format, fatal=False)) + flavor_url, video_id, 'mp4', + m3u8_id=flavor_format, fatal=False)) continue format_id = [] if flavor_format: @@ -88,6 +128,10 @@ class DisneyIE(InfoExtractor): 'ext': ext, 'vcodec': 'none' if (width == 0 and height == 0) else None, }) + if not formats and video_data.get('expired'): + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']), + expected=True) self._sort_formats(formats) subtitles = {} diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1c233f038..494cc3c84 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -991,19 +991,6 @@ class GenericIE(InfoExtractor): 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', }, }, - # Kaltura embed protected with referrer - { - 'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero', - 'info_dict': { - 'id': '1_g4fbemnq', - 'ext': 'mp4', - 'title': 'Violetta - Achter De Schermen - Ruggero', - 'description': 'Achter de schermen met Ruggero', - 'timestamp': 1435133761, - 'upload_date': '20150624', - 'uploader_id': 'echojecka', - }, - }, # Kaltura embed with single quotes { 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY', From 1de9f78e71214e130b5882662cdcd716b737e6ca Mon Sep 17 00:00:00 2001 From: Sergey M Date: Mon, 13 Feb 2017 18:56:05 +0800 Subject: [PATCH 0240/1696] [travis] Separate builds for core and download --- .travis.yml | 7 ++++++- devscripts/run_tests.sh | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 devscripts/run_tests.sh diff --git a/.travis.yml b/.travis.yml index 4833c76e9..8ba93ec02 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,12 @@ python: - "3.5" - "3.6" sudo: false -script: nosetests test --verbose +env: + - YTDL_TEST_SET=core + - YTDL_TEST_SET=download +before_script: + - chmod +x ./devscripts/run_tests.sh +script: ./devscripts/run_tests.sh notifications: email: - filippo.valsorda@gmail.com diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh new file mode 100644 index 000000000..7f4c1e083 --- /dev/null +++ b/devscripts/run_tests.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter" + +test_set="" + +case "$YTDL_TEST_SET" in + core) + test_set="-I test_($DOWNLOAD_TESTS)\.py" + ;; + download) + test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py" + ;; + *) + break + ;; +esac + +nosetests test --verbose $test_set From 454e5cdb17dd4e77f3d387045b083f3d3ed61ae0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Feb 2017 14:28:30 +0100 Subject: [PATCH 0241/1696] [limelight] add support referer protected videos --- youtube_dl/extractor/generic.py | 9 ++++++--- youtube_dl/extractor/limelight.py | 23 ++++++++++++++++------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 494cc3c84..a2b0298ec 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2337,8 +2337,9 @@ class GenericIE(InfoExtractor): 'Channel': 'channel', 'ChannelList': 'channel_list', } - return self.url_result('limelight:%s:%s' % ( - lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2)) + return self.url_result(smuggle_url('limelight:%s:%s' % ( + lm[mobj.group(1)], mobj.group(2)), {'source_url': url}), + 'Limelight%s' % mobj.group(1), mobj.group(2)) mobj = re.search( r'''(?sx) @@ -2348,7 +2349,9 @@ class GenericIE(InfoExtractor): value=(["\'])(?:(?!\3).)*mediaId=(?P[a-z0-9]{32}) ''', webpage) if mobj: - return self.url_result('limelight:media:%s' % mobj.group('id')) + return self.url_result(smuggle_url( + 'limelight:media:%s' % mobj.group('id'), + {'source_url': url}), 'LimelightMedia', mobj.group('id')) # Look for AdobeTVVideo embeds mobj = re.search( diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index e635f3c4d..a3712665b 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -8,6 +8,7 @@ from ..utils import ( determine_ext, float_or_none, int_or_none, + unsmuggle_url, ) @@ -15,20 +16,23 @@ class LimelightBaseIE(InfoExtractor): _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json' - def _call_playlist_service(self, item_id, method, fatal=True): + def _call_playlist_service(self, item_id, method, fatal=True, referer=None): + headers = {} + if referer: + headers['Referer'] = referer return self._download_json( self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), - item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal) + item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers) def _call_api(self, organization_id, item_id, method): return self._download_json( self._API_URL % (organization_id, self._API_PATH, item_id, method), item_id, 'Downloading API %s JSON' % method) - def _extract(self, item_id, pc_method, mobile_method, meta_method): - pc = self._call_playlist_service(item_id, pc_method) + def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None): + pc = self._call_playlist_service(item_id, pc_method, referer=referer) metadata = self._call_api(pc['orgId'], item_id, meta_method) - mobile = self._call_playlist_service(item_id, mobile_method, fatal=False) + mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer) return pc, mobile, metadata def _extract_info(self, streams, mobile_urls, properties): @@ -207,10 +211,13 @@ class LimelightMediaIE(LimelightBaseIE): _API_PATH = 'media' def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) pc, mobile, metadata = self._extract( - video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties') + video_id, 'getPlaylistByMediaId', + 'getMobilePlaylistByMediaId', 'properties', + smuggled_data.get('source_url')) return self._extract_info( pc['playlistItems'][0].get('streams', []), @@ -247,11 +254,13 @@ class LimelightChannelIE(LimelightBaseIE): _API_PATH = 'channels' def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) channel_id = self._match_id(url) pc, mobile, medias = self._extract( channel_id, 'getPlaylistByChannelId', - 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media') + 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', + 'media', smuggled_data.get('source_url')) entries = [ self._extract_info( From 89c6691f9d130ec63552a6ece4743caa572fc962 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 13 Feb 2017 15:08:48 +0100 Subject: [PATCH 0242/1696] [bellmedia] accept longer video id(closes #12114) --- youtube_dl/extractor/bellmedia.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bellmedia.py b/youtube_dl/extractor/bellmedia.py index 32326ed9e..1f5b6ed92 100644 --- a/youtube_dl/extractor/bellmedia.py +++ b/youtube_dl/extractor/bellmedia.py @@ -24,7 +24,7 @@ class BellMediaIE(InfoExtractor): space )\.ca| much\.com - )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6})''' + )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6,})''' _TESTS = [{ 'url': 'http://www.ctv.ca/video/player?vid=706966', 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', @@ -55,6 +55,9 @@ class BellMediaIE(InfoExtractor): }, { 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', 'only_matching': True, + }, { + 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430', + 'only_matching': True, }] _DOMAINS = { 'thecomedynetwork': 'comedy', From 6e5956e6ba32c5e4d186e79fbaff0842818ae56b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 13 Feb 2017 23:17:48 +0700 Subject: [PATCH 0243/1696] [lemonde] Fallback delegate extraction to generic extractor (closes #12115, closes #12116) --- youtube_dl/extractor/lemonde.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/lemonde.py b/youtube_dl/extractor/lemonde.py index 42568f315..3306892e8 100644 --- a/youtube_dl/extractor/lemonde.py +++ b/youtube_dl/extractor/lemonde.py @@ -7,20 +7,40 @@ class LemondeIE(InfoExtractor): _VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P[^/]+)\.html' _TESTS = [{ 'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html', - 'md5': '01fb3c92de4c12c573343d63e163d302', + 'md5': 'da120c8722d8632eec6ced937536cc98', 'info_dict': { 'id': 'lqm3kl', 'ext': 'mp4', 'title': "Comprendre l'affaire Bygmalion en 5 minutes", 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 320, + 'duration': 309, 'upload_date': '20160119', 'timestamp': 1453194778, 'uploader_id': '3pmkp', }, + }, { + # standard iframe embed + 'url': 'http://www.lemonde.fr/les-decodeurs/article/2016/10/18/tout-comprendre-du-ceta-le-petit-cousin-du-traite-transatlantique_5015920_4355770.html', + 'info_dict': { + 'id': 'uzsxms', + 'ext': 'mp4', + 'title': "CETA : quelles suites pour l'accord commercial entre l'Europe et le Canada ?", + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 325, + 'upload_date': '20161021', + 'timestamp': 1477044540, + 'uploader_id': '3pmkp', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html', 'only_matching': True, + }, { + # YouTube embeds + 'url': 'http://www.lemonde.fr/pixels/article/2016/12/09/pourquoi-pewdiepie-superstar-de-youtube-a-menace-de-fermer-sa-chaine_5046649_4408996.html', + 'only_matching': True, }] def _real_extract(self, url): @@ -30,5 +50,9 @@ class LemondeIE(InfoExtractor): digiteka_url = self._proto_relative_url(self._search_regex( r'url\s*:\s*(["\'])(?P(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1', - webpage, 'digiteka url', group='url')) - return self.url_result(digiteka_url, 'Digiteka') + webpage, 'digiteka url', group='url', default=None)) + + if digiteka_url: + return self.url_result(digiteka_url, 'Digiteka') + + return self.url_result(url, 'Generic') From f6d6ca1db3020e7c7771880d0c4b58fdf732a8d5 Mon Sep 17 00:00:00 2001 From: Vobe Date: Sat, 11 Feb 2017 21:11:55 +0100 Subject: [PATCH 0244/1696] [xtube] Improve title extraction --- youtube_dl/extractor/xtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 11717fe98..ed3a37649 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -53,7 +53,7 @@ class XTubeIE(InfoExtractor): if not display_id: display_id = video_id - url = 'http://www.xtube.com/video-watch/-%s' % video_id + url = 'http://www.xtube.com/watch.php?v=%s' % video_id req = sanitized_Request(url) req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') @@ -73,7 +73,7 @@ class XTubeIE(InfoExtractor): self._sort_formats(formats) title = self._search_regex( - (r'

(?P[^<]+)</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), + (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), webpage, 'title', group='title') description = self._search_regex( r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False) From 085f169ffebc17ec8b2bfc63aec8f5df57c7bdcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Feb 2017 23:44:43 +0700 Subject: [PATCH 0245/1696] [xtube] Fix extraction for both kinds of video id (closes #12088) --- youtube_dl/extractor/xtube.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index ed3a37649..5584674a0 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -44,6 +44,9 @@ class XTubeIE(InfoExtractor): }, { 'url': 'xtube:625837', 'only_matching': True, + }, { + 'url': 'xtube:kVTUy_G222_', + 'only_matching': True, }] def _real_extract(self, url): @@ -53,11 +56,16 @@ class XTubeIE(InfoExtractor): if not display_id: display_id = video_id - url = 'http://www.xtube.com/watch.php?v=%s' % video_id - req = sanitized_Request(url) - req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') - webpage = self._download_webpage(req, display_id) + if video_id.isdigit() and len(video_id) < 11: + url_pattern = 'http://www.xtube.com/video-watch/-%s' + else: + url_pattern = 'http://www.xtube.com/watch.php?v=%s' + + webpage = self._download_webpage( + url_pattern % video_id, display_id, headers={ + 'Cookie': 'age_verified=1; cookiesAccepted=1', + }) sources = self._parse_json(self._search_regex( r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),', From 50de3dbad39d0b8cc1529113894f146f6f3f24b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 01:00:06 +0700 Subject: [PATCH 0246/1696] [zdf] Fix extraction (closes #12117) --- youtube_dl/extractor/zdf.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index a365923fb..523bb5c95 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -20,9 +20,9 @@ from ..utils import ( class ZDFBaseIE(InfoExtractor): - def _call_api(self, url, player, referrer, video_id): + def _call_api(self, url, player, referrer, video_id, item): return self._download_json( - url, video_id, 'Downloading JSON content', + url, video_id, 'Downloading JSON %s' % item, headers={ 'Referer': referrer, 'Api-Auth': 'Bearer %s' % player['apiToken'], @@ -104,7 +104,7 @@ class ZDFIE(ZDFBaseIE): }) formats.append(f) - def _extract_entry(self, url, content, video_id): + def _extract_entry(self, url, player, content, video_id): title = content.get('title') or content['teaserHeadline'] t = content['mainVideoContent']['http://zdf.de/rels/target'] @@ -116,7 +116,8 @@ class ZDFIE(ZDFBaseIE): 'http://zdf.de/rels/streams/ptmd-template'].replace( '{playerId}', 'portal') - ptmd = self._download_json(urljoin(url, ptmd_path), video_id) + ptmd = self._call_api( + urljoin(url, ptmd_path), player, url, video_id, 'metadata') formats = [] track_uris = set() @@ -174,8 +175,9 @@ class ZDFIE(ZDFBaseIE): } def _extract_regular(self, url, player, video_id): - content = self._call_api(player['content'], player, url, video_id) - return self._extract_entry(player['content'], content, video_id) + content = self._call_api( + player['content'], player, url, video_id, 'content') + return self._extract_entry(player['content'], player, content, video_id) def _extract_mobile(self, video_id): document = self._download_json( From cedf08ff54d192a0e32ecb3b943f50299cda7ea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 01:07:35 +0700 Subject: [PATCH 0247/1696] [ChangeLog] Actualize --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index 089449dfb..d651f8880 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,7 +4,17 @@ Core * TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085) Extractor +* [zdf] Fix extraction (#12117) +* [xtube] Fix extraction for both kinds of video id (#12088) +* [xtube] Improve title extraction (#12088) ++ [lemonde] Fallback delegate extraction to generic extractor (#12115, #12116) +* [bellmedia] Allow video id longer than 6 characters (#12114) ++ [limelight] Add support for referer protected videos +* [disney] Improve extraction (#4975, #11000, #11882, #11936) +* [hotstar] Improve extraction (#12096) * [einthusan] Fix extraction (#11416) ++ [aenetworks] Add support for lifetimemovieclub.com (#12097) +* [youtube] Fix parsing codecs (#12091) version 2017.02.11 From 58a65ba852443075fe38a3ef74798de05dd57bda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 01:09:18 +0700 Subject: [PATCH 0248/1696] release 2017.02.14 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7bd301cc8..32aa55d83 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.11 +[debug] youtube-dl version 2017.02.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index d651f8880..9242b3eee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.14 Core * TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1f84acfea..3e7e7c0bf 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.11' +__version__ = '2017.02.14' From fcca0d53a8fa47614a39a433a3da7d1ab1d88ed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Caletka?= <ondrej@caletka.cz> Date: Tue, 14 Feb 2017 15:57:17 +0100 Subject: [PATCH 0249/1696] [ceskatelevize] Quick fix to revert to using old HLS-based playlist This fixes recent changes in iVysilani. Proper patch should migrate to MPEG-DASH version, which is now the default. --- youtube_dl/extractor/ceskatelevize.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 4f88c31ad..0f1453b99 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -21,10 +21,10 @@ class CeskaTelevizeIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 'info_dict': { - 'id': '61924494876951776', + 'id': '61924494877246241', 'ext': 'mp4', - 'title': 'Hyde Park Civilizace', - 'description': 'md5:fe93f6eda372d150759d11644ebbfb4a', + 'title': 'Hyde Park Civilizace: Život v Grónsku', + 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 3350, }, @@ -121,6 +121,7 @@ class CeskaTelevizeIE(InfoExtractor): req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('x-addr', '127.0.0.1') req.add_header('X-Requested-With', 'XMLHttpRequest') + req.add_header('User-agent', 'Mozilla/5.0') req.add_header('Referer', url) playlistpage = self._download_json(req, playlist_id) From 5cb2d36c82abf3b753910afe3013b274e31a247a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 22:56:39 +0700 Subject: [PATCH 0250/1696] [ceskatelevize] Extract DASH formats (closes #12119, closes #12133) --- youtube_dl/extractor/ceskatelevize.py | 152 +++++++++++++++----------- 1 file changed, 88 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 0f1453b99..e08bf264c 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -13,6 +13,7 @@ from ..utils import ( float_or_none, sanitized_Request, urlencode_postdata, + USER_AGENTS, ) @@ -114,71 +115,94 @@ class CeskaTelevizeIE(InfoExtractor): 'requestSource': 'iVysilani', } - req = sanitized_Request( - 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', - data=urlencode_postdata(data)) - - req.add_header('Content-type', 'application/x-www-form-urlencoded') - req.add_header('x-addr', '127.0.0.1') - req.add_header('X-Requested-With', 'XMLHttpRequest') - req.add_header('User-agent', 'Mozilla/5.0') - req.add_header('Referer', url) - - playlistpage = self._download_json(req, playlist_id) - - playlist_url = playlistpage['url'] - if playlist_url == 'error_region': - raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) - - req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) - req.add_header('Referer', url) - - playlist_title = self._og_search_title(webpage, default=None) - playlist_description = self._og_search_description(webpage, default=None) - - playlist = self._download_json(req, playlist_id)['playlist'] - playlist_len = len(playlist) - entries = [] - for item in playlist: - is_live = item.get('type') == 'LIVE' - formats = [] - for format_id, stream_url in item['streamUrls'].items(): - formats.extend(self._extract_m3u8_formats( - stream_url, playlist_id, 'mp4', - entry_protocol='m3u8' if is_live else 'm3u8_native', - fatal=False)) - self._sort_formats(formats) - - item_id = item.get('id') or item['assetId'] - title = item['title'] - - duration = float_or_none(item.get('duration')) - thumbnail = item.get('previewImageUrl') - - subtitles = {} - if item.get('type') == 'VOD': - subs = item.get('subtitles') - if subs: - subtitles = self.extract_subtitles(episode_id, subs) - - if playlist_len == 1: - final_title = playlist_title or title - if is_live: - final_title = self._live_title(final_title) - else: - final_title = '%s (%s)' % (playlist_title, title) - - entries.append({ - 'id': item_id, - 'title': final_title, - 'description': playlist_description if playlist_len == 1 else None, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - 'is_live': is_live, - }) + + for user_agent in (None, USER_AGENTS['Safari']): + req = sanitized_Request( + 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', + data=urlencode_postdata(data)) + + req.add_header('Content-type', 'application/x-www-form-urlencoded') + req.add_header('x-addr', '127.0.0.1') + req.add_header('X-Requested-With', 'XMLHttpRequest') + if user_agent: + req.add_header('User-Agent', user_agent) + req.add_header('Referer', url) + + playlistpage = self._download_json(req, playlist_id, fatal=False) + + if not playlistpage: + continue + + playlist_url = playlistpage['url'] + if playlist_url == 'error_region': + raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) + + req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) + req.add_header('Referer', url) + + playlist_title = self._og_search_title(webpage, default=None) + playlist_description = self._og_search_description(webpage, default=None) + + playlist = self._download_json(req, playlist_id, fatal=False) + if not playlist: + continue + + playlist = playlist.get('playlist') + if not isinstance(playlist, list): + continue + + playlist_len = len(playlist) + + for num, item in enumerate(playlist): + is_live = item.get('type') == 'LIVE' + formats = [] + for format_id, stream_url in item.get('streamUrls', {}).items(): + if 'playerType=flash' in stream_url: + formats.extend(self._extract_m3u8_formats( + stream_url, playlist_id, 'mp4', + entry_protocol='m3u8' if is_live else 'm3u8_native', + fatal=False)) + else: + formats.extend(self._extract_mpd_formats( + stream_url, playlist_id, fatal=False)) + + if user_agent and len(entries) == playlist_len: + entries[num]['formats'].extend(formats) + continue + + item_id = item.get('id') or item['assetId'] + title = item['title'] + + duration = float_or_none(item.get('duration')) + thumbnail = item.get('previewImageUrl') + + subtitles = {} + if item.get('type') == 'VOD': + subs = item.get('subtitles') + if subs: + subtitles = self.extract_subtitles(episode_id, subs) + + if playlist_len == 1: + final_title = playlist_title or title + if is_live: + final_title = self._live_title(final_title) + else: + final_title = '%s (%s)' % (playlist_title, title) + + entries.append({ + 'id': item_id, + 'title': final_title, + 'description': playlist_description if playlist_len == 1 else None, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': is_live, + }) + + for e in entries: + self._sort_formats(e['formats']) return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) From 9a372f14b422de15acf91e25a90375688b2ba3fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Feb 2017 23:52:41 +0700 Subject: [PATCH 0251/1696] [pornhub] Extract video URL from tv platform site (#12007, #12129) --- youtube_dl/extractor/pornhub.py | 44 ++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 818d99c1f..7a2737032 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -2,27 +2,27 @@ from __future__ import unicode_literals import itertools -import os +# import os import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_urllib_parse_unquote, - compat_urllib_parse_unquote_plus, - compat_urllib_parse_urlparse, + # compat_urllib_parse_unquote, + # compat_urllib_parse_unquote_plus, + # compat_urllib_parse_urlparse, ) from ..utils import ( ExtractorError, int_or_none, js_to_json, orderedSet, - sanitized_Request, + # sanitized_Request, str_to_int, ) -from ..aes import ( - aes_decrypt_text -) +# from ..aes import ( +# aes_decrypt_text +# ) class PornHubIE(InfoExtractor): @@ -109,10 +109,14 @@ class PornHubIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - req = sanitized_Request( - 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id) - req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, video_id) + def dl_webpage(platform): + return self._download_webpage( + 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id, + video_id, headers={ + 'Cookie': 'age_verified=1; platform=%s' % platform, + }) + + webpage = dl_webpage('pc') error_msg = self._html_search_regex( r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>', @@ -123,10 +127,19 @@ class PornHubIE(InfoExtractor): 'PornHub said: %s' % error_msg, expected=True, video_id=video_id) + tv_webpage = dl_webpage('tv') + + video_url = self._search_regex( + r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage, + 'video url', group='url') + + title = self._search_regex( + r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None) + # video_title from flashvars contains whitespace instead of non-ASCII (see # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying # on that anymore. - title = self._html_search_meta( + title = title or self._html_search_meta( 'twitter:title', webpage, default=None) or self._search_regex( (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)', r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1', @@ -156,6 +169,7 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') + """ video_variables = {} for video_variablename, quote, video_variable in re.findall( r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage): @@ -197,6 +211,7 @@ class PornHubIE(InfoExtractor): 'height': height, }) self._sort_formats(formats) + """ page_params = self._parse_json(self._search_regex( r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})', @@ -209,6 +224,7 @@ class PornHubIE(InfoExtractor): return { 'id': video_id, + 'url': video_url, 'uploader': video_uploader, 'title': title, 'thumbnail': thumbnail, @@ -217,7 +233,7 @@ class PornHubIE(InfoExtractor): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, - 'formats': formats, + # 'formats': formats, 'age_limit': 18, 'tags': tags, 'categories': categories, From 22ce9ad2bdad2bf79b22f82cfff7f58156c9d349 Mon Sep 17 00:00:00 2001 From: Marek Rusinowski <marekrusinowski@gmail.com> Date: Mon, 13 Feb 2017 21:42:26 +0100 Subject: [PATCH 0252/1696] [vod.pl] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vodpl.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/vodpl.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 76ad7c40b..657e45e6f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1147,6 +1147,7 @@ from .vlive import ( VLiveChannelIE ) from .vodlocker import VodlockerIE +from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicerepublic import VoiceRepublicIE from .voxmedia import VoxMediaIE diff --git a/youtube_dl/extractor/vodpl.py b/youtube_dl/extractor/vodpl.py new file mode 100644 index 000000000..f612347ce --- /dev/null +++ b/youtube_dl/extractor/vodpl.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .onet import OnetBaseIE +from ..utils import clean_html + + +class VODPlIE(OnetBaseIE): + _VALID_URL = r'https?://vod\.pl/(?:.*/)?(?P<id>[0-9a-zA-Z]+)' + + _TEST = { + 'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns', + 'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74', + 'info_dict': { + 'id': '3ep3jns', + 'ext': 'mp4', + 'title': 'Chłopaki nie płaczą', + 'description': 'Kuba Brenner aby pomóc swojemu nieśmiałemu przyjacielowi Oskarowi wynajmuje w agencji towarzyskiej dwie panie. Po upojnej nocy okazuje się, że chłopcy nie byli przygotowani finansowo. "Opiekun artystyczny" dziewczyn zabiera w ramach rekompensaty drogocenną rzeźbę należącą do wujka Oskara. Kłopoty chłopców zaczynają się, gdy Kuba udaje się do agencji aby wykupić figurkę i trafia w sam środek mafijnej transakcji... Idiotyczny przypadek sprawia, że w klubie dochodzi do strzelaniny podczas której Grucha i Bolec zostają ranni, ginie również walizka z pieniędzmi... Podejrzenie pada na Kubę.', + 'timestamp': 1463415154, + 'duration': 5765, + 'upload_date': '20160516', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mvp_id = self._search_mvp_id(webpage) + + info_dict = self._extract_from_id(mvp_id, webpage) + info_dict.update({ + 'id': video_id, + 'description': clean_html(info_dict['description']).strip().replace('\r', '\n') + }) + + return info_dict From 6092ccd05844976ea946ba5277f2b00ccb5c7920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 00:52:31 +0700 Subject: [PATCH 0253/1696] [vodpl] Make more robust and add another test (closes #12122) --- youtube_dl/extractor/vodpl.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/vodpl.py b/youtube_dl/extractor/vodpl.py index f612347ce..9e919708e 100644 --- a/youtube_dl/extractor/vodpl.py +++ b/youtube_dl/extractor/vodpl.py @@ -2,35 +2,31 @@ from __future__ import unicode_literals from .onet import OnetBaseIE -from ..utils import clean_html class VODPlIE(OnetBaseIE): - _VALID_URL = r'https?://vod\.pl/(?:.*/)?(?P<id>[0-9a-zA-Z]+)' + _VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)' - _TEST = { + _TESTS = [{ 'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns', 'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74', 'info_dict': { 'id': '3ep3jns', 'ext': 'mp4', 'title': 'Chłopaki nie płaczą', - 'description': 'Kuba Brenner aby pomóc swojemu nieśmiałemu przyjacielowi Oskarowi wynajmuje w agencji towarzyskiej dwie panie. Po upojnej nocy okazuje się, że chłopcy nie byli przygotowani finansowo. "Opiekun artystyczny" dziewczyn zabiera w ramach rekompensaty drogocenną rzeźbę należącą do wujka Oskara. Kłopoty chłopców zaczynają się, gdy Kuba udaje się do agencji aby wykupić figurkę i trafia w sam środek mafijnej transakcji... Idiotyczny przypadek sprawia, że w klubie dochodzi do strzelaniny podczas której Grucha i Bolec zostają ranni, ginie również walizka z pieniędzmi... Podejrzenie pada na Kubę.', + 'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224', 'timestamp': 1463415154, 'duration': 5765, 'upload_date': '20160516', }, - } + }, { + 'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - mvp_id = self._search_mvp_id(webpage) - - info_dict = self._extract_from_id(mvp_id, webpage) - info_dict.update({ - 'id': video_id, - 'description': clean_html(info_dict['description']).strip().replace('\r', '\n') - }) - + info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage) + info_dict['id'] = video_id return info_dict From d31aa74fdb3f69071ba869feba03525f67e974f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 00:58:18 +0700 Subject: [PATCH 0254/1696] [onetmvp] Add shortcut extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/onet.py | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 657e45e6f..b2ee0c1b0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -694,6 +694,7 @@ from .ondemandkorea import OnDemandKoreaIE from .onet import ( OnetIE, OnetChannelIE, + OnetMVPIE, ) from .onionstudios import OnionStudiosIE from .ooyala import ( diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 0a501b3e5..46bad492a 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -23,7 +23,7 @@ class OnetBaseIE(InfoExtractor): return self._search_regex( r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id') - def _extract_from_id(self, video_id, webpage): + def _extract_from_id(self, video_id, webpage=None): response = self._download_json( 'http://qi.ckm.onetapi.pl/', video_id, query={ @@ -74,8 +74,10 @@ class OnetBaseIE(InfoExtractor): meta = video.get('meta', {}) - title = self._og_search_title(webpage, default=None) or meta['title'] - description = self._og_search_description(webpage, default=None) or meta.get('description') + title = (self._og_search_title( + webpage, default=None) if webpage else None) or meta['title'] + description = (self._og_search_description( + webpage, default=None) if webpage else None) or meta.get('description') duration = meta.get('length') or meta.get('lenght') timestamp = parse_iso8601(meta.get('addDate'), ' ') @@ -89,6 +91,18 @@ class OnetBaseIE(InfoExtractor): } +class OnetMVPIE(OnetBaseIE): + _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)' + + _TEST = { + 'url': 'onetmvp:381027.1509591944', + 'only_matching': True, + } + + def _real_extract(self, url): + return self._extract_from_id(self._match_id(url)) + + class OnetIE(OnetBaseIE): _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)' IE_NAME = 'onet.tv' From 43a3d9edfcdad8eb33758c4a7f4f912322001b8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 01:14:06 +0700 Subject: [PATCH 0255/1696] [onetpl] Add support for onet.pl (closes #10507) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/onet.py | 32 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b2ee0c1b0..be3688d5a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -695,6 +695,7 @@ from .onet import ( OnetIE, OnetChannelIE, OnetMVPIE, + OnetPlIE, ) from .onionstudios import OnionStudiosIE from .ooyala import ( diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 46bad492a..801aadbff 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -181,3 +181,35 @@ class OnetChannelIE(OnetBaseIE): channel_title = strip_or_none(get_element_by_class('o_channelName', webpage)) channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage)) return self.playlist_result(entries, channel_id, channel_title, channel_description) + + +class OnetPlIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?onet\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' + IE_NAME = 'onet.pl' + + _TESTS = [{ + 'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly', + 'md5': 'b94021eb56214c3969380388b6e73cb0', + 'info_dict': { + 'id': '1561707.1685479', + 'ext': 'mp4', + 'title': 'Ziobro wygrał kwalifikacje w Pjongczangu', + 'description': 'md5:61fb0740084d2d702ea96512a03585b4', + 'upload_date': '20170214', + 'timestamp': 1487078046, + }, + }, { + 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + mvp_id = self._search_regex( + r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id') + + return self.url_result( + 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id) From 04a741232f8e03cc91a3539066c66aed802076b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 01:23:55 +0700 Subject: [PATCH 0256/1696] [onetpl] Add support for businessinsider.com.pl and plejada.pl --- youtube_dl/extractor/onet.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py index 801aadbff..94f57990b 100644 --- a/youtube_dl/extractor/onet.py +++ b/youtube_dl/extractor/onet.py @@ -184,7 +184,7 @@ class OnetChannelIE(OnetBaseIE): class OnetPlIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?onet\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)' IE_NAME = 'onet.pl' _TESTS = [{ @@ -201,6 +201,15 @@ class OnetPlIE(InfoExtractor): }, { 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3', 'only_matching': True, + }, { + 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e', + 'only_matching': True, + }, { + 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk', + 'only_matching': True, + }, { + 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89', + 'only_matching': True, }] def _real_extract(self, url): From 3021cf83b7cd45283fd1a72859e46f44e67ce7bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 02:08:32 +0700 Subject: [PATCH 0257/1696] [pinkbike] Fix uploader extraction (closes #12054) --- youtube_dl/extractor/pinkbike.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py index 6a4580d54..9f3501f77 100644 --- a/youtube_dl/extractor/pinkbike.py +++ b/youtube_dl/extractor/pinkbike.py @@ -64,7 +64,8 @@ class PinkbikeIE(InfoExtractor): 'video:duration', webpage, 'duration')) uploader = self._search_regex( - r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False) + r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage, + 'uploader', fatal=False) upload_date = unified_strdate(self._search_regex( r'class="fullTime"[^>]+title="([^"]+)"', webpage, 'upload date', fatal=False)) From 1bd05345ea4b91598ec04b8e0d33fd14f9e2eddc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 15 Feb 2017 14:18:50 +0100 Subject: [PATCH 0258/1696] [amcnetworks] fix extraction(closes #12127) --- youtube_dl/extractor/amcnetworks.py | 30 ++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index 87c803e94..b71d1a093 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -53,20 +53,30 @@ class AMCNetworksIE(ThePlatformIE): 'mbr': 'true', 'manifest': 'm3u', } - media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url') + media_url = self._search_regex( + r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', + webpage, 'media url') theplatform_metadata = self._download_theplatform_metadata(self._search_regex( - r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id) + r'link\.theplatform\.com/s/([^?]+)', + media_url, 'theplatform_path'), display_id) info = self._parse_theplatform_metadata(theplatform_metadata) video_id = theplatform_metadata['pid'] title = theplatform_metadata['title'] rating = theplatform_metadata['ratings'][0]['rating'] - auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required') + auth_required = self._search_regex( + r'window\.authRequired\s*=\s*(true|false);', + webpage, 'auth required') if auth_required == 'true': - requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id') - resource = self._get_mvpd_resource(requestor_id, title, video_id, rating) - query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource) + requestor_id = self._search_regex( + r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', + webpage, 'requestor id') + resource = self._get_mvpd_resource( + requestor_id, title, video_id, rating) + query['auth'] = self._extract_mvpd_auth( + url, video_id, requestor_id, resource) media_url = update_url_query(media_url, query) - formats, subtitles = self._extract_theplatform_smil(media_url, video_id) + formats, subtitles = self._extract_theplatform_smil( + media_url, video_id) self._sort_formats(formats) info.update({ 'id': video_id, @@ -78,9 +88,11 @@ class AMCNetworksIE(ThePlatformIE): if ns_keys: ns = list(ns_keys)[0] series = theplatform_metadata.get(ns + '$show') - season_number = int_or_none(theplatform_metadata.get(ns + '$season')) + season_number = int_or_none( + theplatform_metadata.get(ns + '$season')) episode = theplatform_metadata.get(ns + '$episodeTitle') - episode_number = int_or_none(theplatform_metadata.get(ns + '$episode')) + episode_number = int_or_none( + theplatform_metadata.get(ns + '$episode')) if season_number: title = 'Season %d - %s' % (season_number, title) if series: From db13c16ef8968613680e2bbc85f373c3e74faf98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 23:12:10 +0700 Subject: [PATCH 0259/1696] [utils] Add support for quoted string literals in --match-filter (closes #8050, closes #12142, closes #12144) --- test/test_YoutubeDL.py | 24 ++++++++++++++++++++++++ youtube_dl/utils.py | 9 +++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 8bf00bea9..d07c35be8 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding: utf-8 from __future__ import unicode_literals @@ -606,6 +607,8 @@ class TestYoutubeDL(unittest.TestCase): 'duration': 30, 'filesize': 10 * 1024, 'playlist_id': '42', + 'uploader': "變態妍字幕版 太妍 тест", + 'creator': "тест ' 123 ' тест--", } second = { 'id': '2', @@ -616,6 +619,7 @@ class TestYoutubeDL(unittest.TestCase): 'description': 'foo', 'filesize': 5 * 1024, 'playlist_id': '43', + 'uploader': "тест 123", } videos = [first, second] @@ -656,6 +660,26 @@ class TestYoutubeDL(unittest.TestCase): res = get_videos(f) self.assertEqual(res, ['1']) + f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('creator = "тест \' 123 \' тест--"') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func("creator = 'тест \\' 123 \\' тест--'") + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30") + res = get_videos(f) + self.assertEqual(res, []) + def test_playlist_items_selection(self): entries = [{ 'id': compat_str(i), diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1279a9042..07c07be6f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2383,6 +2383,7 @@ def _match_one(filter_part, dct): \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* (?: (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| + (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)| (?P<strval>(?![0-9.])[a-z0-9A-Z]*) ) \s*$ @@ -2391,7 +2392,8 @@ def _match_one(filter_part, dct): if m: op = COMPARISON_OPERATORS[m.group('op')] actual_value = dct.get(m.group('key')) - if (m.group('strval') is not None or + if (m.group('quotedstrval') is not None or + m.group('strval') is not None or # If the original field is a string and matching comparisonvalue is # a number we should respect the origin of the original field # and process comparison value as a string (see @@ -2401,7 +2403,10 @@ def _match_one(filter_part, dct): if m.group('op') not in ('=', '!='): raise ValueError( 'Operator %s does not support string values!' % m.group('op')) - comparison_value = m.group('strval') or m.group('intval') + comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') + quote = m.group('quote') + if quote is not None: + comparison_value = comparison_value.replace(r'\%s' % quote, quote) else: try: comparison_value = int(m.group('intval')) From 398dea321001b99ac4ad28d3d60a5317c4a439d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 23:20:46 +0700 Subject: [PATCH 0260/1696] [test_YoutubeDL] Fix invalid escape sequences --- test/test_YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index d07c35be8..2cfcf743a 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -541,10 +541,10 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(ydl._format_note({}), '') assertRegexpMatches(self, ydl._format_note({ 'vbr': 10, - }), '^\s*10k$') + }), r'^\s*10k$') assertRegexpMatches(self, ydl._format_note({ 'fps': 30, - }), '^30fps$') + }), r'^30fps$') def test_postprocessors(self): filename = 'post-processor-testfile.mp4' From 099cfdb770f458de7cfdf3e814fbb9f43db217ea Mon Sep 17 00:00:00 2001 From: Anisse Astier <anisse@astier.eu> Date: Wed, 15 Feb 2017 17:28:31 +0100 Subject: [PATCH 0261/1696] [devscripts/run_tests.sh] Change permission for script to 755 --- .travis.yml | 2 -- devscripts/run_tests.sh | 0 2 files changed, 2 deletions(-) mode change 100644 => 100755 devscripts/run_tests.sh diff --git a/.travis.yml b/.travis.yml index 8ba93ec02..f41e11137 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,8 +11,6 @@ sudo: false env: - YTDL_TEST_SET=core - YTDL_TEST_SET=download -before_script: - - chmod +x ./devscripts/run_tests.sh script: ./devscripts/run_tests.sh notifications: email: diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh old mode 100644 new mode 100755 From de4d378c0cd9035d4ab93dc6826a17c76f388641 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Feb 2017 23:38:00 +0700 Subject: [PATCH 0262/1696] [ceskatelevize] Prefix format ids --- youtube_dl/extractor/ceskatelevize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index e08bf264c..1b16e5aaa 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -162,10 +162,10 @@ class CeskaTelevizeIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', entry_protocol='m3u8' if is_live else 'm3u8_native', - fatal=False)) + m3u8_id='hls', fatal=False)) else: formats.extend(self._extract_mpd_formats( - stream_url, playlist_id, fatal=False)) + stream_url, playlist_id, mpd_id='dash', fatal=False)) if user_agent and len(entries) == playlist_len: entries[num]['formats'].extend(formats) From eafaeb226a277008fb8df72bf0326f2b369ff6a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Feb 2017 00:04:15 +0700 Subject: [PATCH 0263/1696] [ceskatelevize] Lower priority for audio description sources (#12119) --- youtube_dl/extractor/ceskatelevize.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 1b16e5aaa..b1dfacf80 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -159,13 +159,19 @@ class CeskaTelevizeIE(InfoExtractor): formats = [] for format_id, stream_url in item.get('streamUrls', {}).items(): if 'playerType=flash' in stream_url: - formats.extend(self._extract_m3u8_formats( + stream_formats = self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', entry_protocol='m3u8' if is_live else 'm3u8_native', - m3u8_id='hls', fatal=False)) + m3u8_id='hls-%s' % format_id, fatal=False) else: - formats.extend(self._extract_mpd_formats( - stream_url, playlist_id, mpd_id='dash', fatal=False)) + stream_formats = self._extract_mpd_formats( + stream_url, playlist_id, + mpd_id='dash-%s' % format_id, fatal=False) + # See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031 + if format_id == 'audioDescription': + for f in stream_formats: + f['source_preference'] = -10 + formats.extend(stream_formats) if user_agent and len(entries) == playlist_len: entries[num]['formats'].extend(formats) From 3aa25395aa02b7a33e0fbf6d38e39fffee268255 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Feb 2017 00:08:56 +0700 Subject: [PATCH 0264/1696] [ChangeLog] Actualize --- ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog b/ChangeLog index 9242b3eee..912e1bbdc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +version <unreleased> + +Core ++ [utils] Add support for quoted string literals in --match-filter (#8050, + #12142, #12144) + +Extractors +* [ceskatelevize] Lower priority for audio description sources (#12119) +* [amcnetworks] Fix extraction (#12127) +* [pinkbike] Fix uploader extraction (#12054) ++ [onetpl] Add support for businessinsider.com.pl and plejada.pl ++ [onetpl] Add support for onet.pl (#10507) ++ [onetmvp] Add shortcut extractor ++ [vodpl] Add support for vod.pl (#12122) ++ [pornhub] Extract video URL from tv platform site (#12007, #12129) ++ [ceskatelevize] Extract DASH formats (#12119, #12133) + + version 2017.02.14 Core From 2480b056c137e514662b70053ec2df1391b6c2ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Feb 2017 00:10:04 +0700 Subject: [PATCH 0265/1696] release 2017.02.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 32aa55d83..06711f73b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.14 +[debug] youtube-dl version 2017.02.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 912e1bbdc..8ef8a8307 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.16 Core + [utils] Add support for quoted string literals in --match-filter (#8050, diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3e84f1237..5a436e8f7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -546,8 +546,10 @@ - **OktoberfestTV** - **on.aol.com** - **OnDemandKorea** + - **onet.pl** - **onet.tv** - **onet.tv:channel** + - **OnetMVP** - **OnionStudios** - **Ooyala** - **OoyalaExternal** @@ -900,6 +902,7 @@ - **vlive** - **vlive:channel** - **Vodlocker** + - **VODPl** - **VODPlatform** - **VoiceRepublic** - **VoxMedia** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3e7e7c0bf..323e80954 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.14' +__version__ = '2017.02.16' From b898f0a173fa040ddf95dbd97650cec07a8f19f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Feb 2017 04:57:42 +0700 Subject: [PATCH 0266/1696] [elpais] Fix typo and improve extraction (closes #12139) --- youtube_dl/extractor/elpais.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py index 99e00cf3c..b89f6db62 100644 --- a/youtube_dl/extractor/elpais.py +++ b/youtube_dl/extractor/elpais.py @@ -39,6 +39,18 @@ class ElPaisIE(InfoExtractor): 'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas', 'upload_date': '20170127', }, + }, { + 'url': 'http://epv.elpais.com/epv/2017/02/14/programa_la_voz_de_inaki/1487062137_075943.html', + 'info_dict': { + 'id': '1487062137_075943', + 'ext': 'mp4', + 'title': 'Disyuntivas', + 'description': 'md5:a0fb1485c4a6a8a917e6f93878e66218', + 'upload_date': '20170214', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -59,14 +71,15 @@ class ElPaisIE(InfoExtractor): video_url = prefix + video_suffix thumbnail_suffix = self._search_regex( r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", - webpage, 'thumbnail URL', fatal=False) + webpage, 'thumbnail URL', default=None) thumbnail = ( None if thumbnail_suffix is None - else prefix + thumbnail_suffix) + else prefix + thumbnail_suffix) or self._og_search_thumbnail(webpage) title = self._html_search_regex( - (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title', - r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'), - webpage, 'title') + (r"tituloVideo\s*=\s*'([^']+)'", + r'<h2 class="entry-header entry-title.*?>(.*?)</h2>', + r'<h1[^>]+class="titulo"[^>]*>([^<]+)'), + webpage, 'title', default=None) or self._og_search_title(webpage) upload_date = unified_strdate(self._search_regex( r'<p class="date-header date-int updated"\s+title="([^"]+)">', webpage, 'upload date', default=None) or self._html_search_meta( From a4a554a79354981fcab55de8eaab7b95a40bbb48 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 16 Feb 2017 23:42:36 +0800 Subject: [PATCH 0267/1696] [generic] Try parsing JWPlayer embedded videos (closes #12030) --- ChangeLog | 6 ++ youtube_dl/extractor/archiveorg.py | 4 +- youtube_dl/extractor/common.py | 118 ++++++++++++++++++++ youtube_dl/extractor/generic.py | 20 ++++ youtube_dl/extractor/jwplatform.py | 132 +---------------------- youtube_dl/extractor/ondemandkorea.py | 4 +- youtube_dl/extractor/pornhub.py | 44 -------- youtube_dl/extractor/pornoxo.py | 4 +- youtube_dl/extractor/rentv.py | 3 +- youtube_dl/extractor/rudo.py | 4 +- youtube_dl/extractor/screencastomatic.py | 4 +- youtube_dl/extractor/sendtonews.py | 4 +- youtube_dl/extractor/thisav.py | 4 +- youtube_dl/extractor/tvnoe.py | 4 +- youtube_dl/extractor/vidzi.py | 4 +- youtube_dl/extractor/wimp.py | 4 +- 16 files changed, 166 insertions(+), 197 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8ef8a8307..4e69b03d0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [generic] Support complex JWPlayer embedded videos (#12030) + + version 2017.02.16 Core diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index 486dff82d..e21045bed 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -1,13 +1,13 @@ from __future__ import unicode_literals -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( unified_strdate, clean_html, ) -class ArchiveOrgIE(JWPlatformBaseIE): +class ArchiveOrgIE(InfoExtractor): IE_NAME = 'archive.org' IE_DESC = 'archive.org videos' _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$' diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9681453ca..f6ff56eda 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -40,6 +40,7 @@ from ..utils import ( fix_xml_ampersands, float_or_none, int_or_none, + js_to_json, parse_iso8601, RegexNotFoundError, sanitize_filename, @@ -2073,6 +2074,123 @@ class InfoExtractor(object): }) return formats + @staticmethod + def _find_jwplayer_data(webpage): + mobj = re.search( + r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)', + webpage) + if mobj: + return mobj.group('options') + + def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): + jwplayer_data = self._parse_json( + self._find_jwplayer_data(webpage), video_id, + transform_source=js_to_json) + return self._parse_jwplayer_data( + jwplayer_data, video_id, *args, **kwargs) + + def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, + m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): + # JWPlayer backward compatibility: flattened playlists + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 + if 'playlist' not in jwplayer_data: + jwplayer_data = {'playlist': [jwplayer_data]} + + entries = [] + + # JWPlayer backward compatibility: single playlist item + # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 + if not isinstance(jwplayer_data['playlist'], list): + jwplayer_data['playlist'] = [jwplayer_data['playlist']] + + for video_data in jwplayer_data['playlist']: + # JWPlayer backward compatibility: flattened sources + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 + if 'sources' not in video_data: + video_data['sources'] = [video_data] + + this_video_id = video_id or video_data['mediaid'] + + formats = [] + for source in video_data['sources']: + source_url = self._proto_relative_url(source['file']) + if base_url: + source_url = compat_urlparse.urljoin(base_url, source_url) + source_type = source.get('type') or '' + ext = mimetype2ext(source_type) or determine_ext(source_url) + if source_type == 'hls' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + source_url, this_video_id, mpd_id=mpd_id, fatal=False)) + # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 + elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): + formats.append({ + 'url': source_url, + 'vcodec': 'none', + 'ext': ext, + }) + else: + height = int_or_none(source.get('height')) + if height is None: + # Often no height is provided but there is a label in + # format like 1080p. + height = int_or_none(self._search_regex( + r'^(\d{3,})[pP]$', source.get('label') or '', + 'height', default=None)) + a_format = { + 'url': source_url, + 'width': int_or_none(source.get('width')), + 'height': height, + 'ext': ext, + } + if source_url.startswith('rtmp'): + a_format['ext'] = 'flv' + + # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as + # of jwplayer.flash.swf + rtmp_url_parts = re.split( + r'((?:mp4|mp3|flv):)', source_url, 1) + if len(rtmp_url_parts) == 3: + rtmp_url, prefix, play_path = rtmp_url_parts + a_format.update({ + 'url': rtmp_url, + 'play_path': prefix + play_path, + }) + if rtmp_params: + a_format.update(rtmp_params) + formats.append(a_format) + self._sort_formats(formats) + + subtitles = {} + tracks = video_data.get('tracks') + if tracks and isinstance(tracks, list): + for track in tracks: + if track.get('kind') != 'captions': + continue + track_url = urljoin(base_url, track.get('file')) + if not track_url: + continue + subtitles.setdefault(track.get('label') or 'en', []).append({ + 'url': self._proto_relative_url(track_url) + }) + + entries.append({ + 'id': this_video_id, + 'title': video_data['title'] if require_title else video_data.get('title'), + 'description': video_data.get('description'), + 'thumbnail': self._proto_relative_url(video_data.get('image')), + 'timestamp': int_or_none(video_data.get('pubdate')), + 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), + 'subtitles': subtitles, + 'formats': formats, + }) + if len(entries) == 1: + return entries[0] + else: + return self.playlist_result(entries) + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a2b0298ec..3db31debe 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -20,6 +20,7 @@ from ..utils import ( float_or_none, HEADRequest, is_html, + js_to_json, orderedSet, sanitized_Request, smuggle_url, @@ -961,6 +962,16 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + # Complex jwplayer + { + 'url': 'http://www.indiedb.com/games/king-machine/videos', + 'info_dict': { + 'id': 'videos', + 'ext': 'mp4', + 'title': 'king machine trailer 1', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, # rtl.nl embed { 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', @@ -2488,6 +2499,15 @@ class GenericIE(InfoExtractor): self._sort_formats(entry['formats']) return self.playlist_result(entries) + jwplayer_data_str = self._find_jwplayer_data(webpage) + if jwplayer_data_str: + try: + jwplayer_data = self._parse_json( + jwplayer_data_str, video_id, transform_source=js_to_json) + return self._parse_jwplayer_data(jwplayer_data, video_id) + except ExtractorError: + pass + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index aff7ab49a..33d55f770 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -4,139 +4,9 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - determine_ext, - float_or_none, - int_or_none, - js_to_json, - mimetype2ext, - urljoin, -) -class JWPlatformBaseIE(InfoExtractor): - @staticmethod - def _find_jwplayer_data(webpage): - # TODO: Merge this with JWPlayer-related codes in generic.py - - mobj = re.search( - r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)', - webpage) - if mobj: - return mobj.group('options') - - def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): - jwplayer_data = self._parse_json( - self._find_jwplayer_data(webpage), video_id, - transform_source=js_to_json) - return self._parse_jwplayer_data( - jwplayer_data, video_id, *args, **kwargs) - - def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, - m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): - # JWPlayer backward compatibility: flattened playlists - # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 - if 'playlist' not in jwplayer_data: - jwplayer_data = {'playlist': [jwplayer_data]} - - entries = [] - - # JWPlayer backward compatibility: single playlist item - # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 - if not isinstance(jwplayer_data['playlist'], list): - jwplayer_data['playlist'] = [jwplayer_data['playlist']] - - for video_data in jwplayer_data['playlist']: - # JWPlayer backward compatibility: flattened sources - # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 - if 'sources' not in video_data: - video_data['sources'] = [video_data] - - this_video_id = video_id or video_data['mediaid'] - - formats = [] - for source in video_data['sources']: - source_url = self._proto_relative_url(source['file']) - if base_url: - source_url = compat_urlparse.urljoin(base_url, source_url) - source_type = source.get('type') or '' - ext = mimetype2ext(source_type) or determine_ext(source_url) - if source_type == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - source_url, this_video_id, mpd_id=mpd_id, fatal=False)) - # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 - elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): - formats.append({ - 'url': source_url, - 'vcodec': 'none', - 'ext': ext, - }) - else: - height = int_or_none(source.get('height')) - if height is None: - # Often no height is provided but there is a label in - # format like 1080p. - height = int_or_none(self._search_regex( - r'^(\d{3,})[pP]$', source.get('label') or '', - 'height', default=None)) - a_format = { - 'url': source_url, - 'width': int_or_none(source.get('width')), - 'height': height, - 'ext': ext, - } - if source_url.startswith('rtmp'): - a_format['ext'] = 'flv' - - # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as - # of jwplayer.flash.swf - rtmp_url_parts = re.split( - r'((?:mp4|mp3|flv):)', source_url, 1) - if len(rtmp_url_parts) == 3: - rtmp_url, prefix, play_path = rtmp_url_parts - a_format.update({ - 'url': rtmp_url, - 'play_path': prefix + play_path, - }) - if rtmp_params: - a_format.update(rtmp_params) - formats.append(a_format) - self._sort_formats(formats) - - subtitles = {} - tracks = video_data.get('tracks') - if tracks and isinstance(tracks, list): - for track in tracks: - if track.get('kind') != 'captions': - continue - track_url = urljoin(base_url, track.get('file')) - if not track_url: - continue - subtitles.setdefault(track.get('label') or 'en', []).append({ - 'url': self._proto_relative_url(track_url) - }) - - entries.append({ - 'id': this_video_id, - 'title': video_data['title'] if require_title else video_data.get('title'), - 'description': video_data.get('description'), - 'thumbnail': self._proto_relative_url(video_data.get('image')), - 'timestamp': int_or_none(video_data.get('pubdate')), - 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), - 'subtitles': subtitles, - 'formats': formats, - }) - if len(entries) == 1: - return entries[0] - else: - return self.playlist_result(entries) - - -class JWPlatformIE(JWPlatformBaseIE): +class JWPlatformIE(InfoExtractor): _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' _TEST = { 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index de1d6b08a..dcd157777 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -1,14 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( ExtractorError, js_to_json, ) -class OnDemandKoreaIE(JWPlatformBaseIE): +class OnDemandKoreaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html' _TEST = { 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html', diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 7a2737032..9b413590a 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -169,50 +169,6 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') - """ - video_variables = {} - for video_variablename, quote, video_variable in re.findall( - r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage): - video_variables[video_variablename] = video_variable - - video_urls = [] - for encoded_video_url in re.findall( - r'player_quality_[0-9]{3,4}p\s*=(.+?);', webpage): - for varname, varval in video_variables.items(): - encoded_video_url = encoded_video_url.replace(varname, varval) - video_urls.append(re.sub(r'[\s+]', '', encoded_video_url)) - - if webpage.find('"encrypted":true') != -1: - password = compat_urllib_parse_unquote_plus( - self._search_regex(r'"video_title":"([^"]+)', webpage, 'password')) - video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) - - formats = [] - for video_url in video_urls: - path = compat_urllib_parse_urlparse(video_url).path - extension = os.path.splitext(path)[1][1:] - format = path.split('/')[5].split('_')[:2] - format = '-'.join(format) - - m = re.match(r'^(?P<height>[0-9]+)[pP]-(?P<tbr>[0-9]+)[kK]$', format) - if m is None: - height = None - tbr = None - else: - height = int(m.group('height')) - tbr = int(m.group('tbr')) - - formats.append({ - 'url': video_url, - 'ext': extension, - 'format': format, - 'format_id': format, - 'tbr': tbr, - 'height': height, - }) - self._sort_formats(formats) - """ - page_params = self._parse_json(self._search_regex( r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})', webpage, 'page parameters', group='data', default='{}'), diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py index 1a0cce7e0..2831368b6 100644 --- a/youtube_dl/extractor/pornoxo.py +++ b/youtube_dl/extractor/pornoxo.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( str_to_int, ) -class PornoXOIE(JWPlatformBaseIE): +class PornoXOIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' _TEST = { 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py index 422c02cff..d338b3a93 100644 --- a/youtube_dl/extractor/rentv.py +++ b/youtube_dl/extractor/rentv.py @@ -2,11 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .jwplatform import JWPlatformBaseIE from ..compat import compat_str -class RENTVIE(JWPlatformBaseIE): +class RENTVIE(InfoExtractor): _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://ren.tv/video/epizod/118577', diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py index 3bfe934d8..51644011e 100644 --- a/youtube_dl/extractor/rudo.py +++ b/youtube_dl/extractor/rudo.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( js_to_json, get_element_by_class, @@ -11,7 +11,7 @@ from ..utils import ( ) -class RudoIE(JWPlatformBaseIE): +class RudoIE(InfoExtractor): _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)' _TEST = { diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py index 94a2a37d2..b5e76c9af 100644 --- a/youtube_dl/extractor/screencastomatic.py +++ b/youtube_dl/extractor/screencastomatic.py @@ -1,11 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import js_to_json -class ScreencastOMaticIE(JWPlatformBaseIE): +class ScreencastOMaticIE(InfoExtractor): _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)' _TEST = { 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl', diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py index 9880a5a78..9d9652949 100644 --- a/youtube_dl/extractor/sendtonews.py +++ b/youtube_dl/extractor/sendtonews.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( float_or_none, parse_iso8601, @@ -14,7 +14,7 @@ from ..utils import ( ) -class SendtoNewsIE(JWPlatformBaseIE): +class SendtoNewsIE(InfoExtractor): _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)' _TEST = { diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 4473a3c77..b7b3568cb 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -3,11 +3,11 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import remove_end -class ThisAVIE(JWPlatformBaseIE): +class ThisAVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' _TESTS = [{ 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html', diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py index 6d5c74826..1a5b76bf2 100644 --- a/youtube_dl/extractor/tvnoe.py +++ b/youtube_dl/extractor/tvnoe.py @@ -1,7 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( clean_html, get_element_by_class, @@ -9,7 +9,7 @@ from ..utils import ( ) -class TVNoeIE(JWPlatformBaseIE): +class TVNoeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.tvnoe.cz/video/10362', diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index 9950c62ad..1f1828fce 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re -from .jwplatform import JWPlatformBaseIE +from .common import InfoExtractor from ..utils import ( decode_packed_codes, js_to_json, @@ -12,7 +12,7 @@ from ..utils import ( ) -class VidziIE(JWPlatformBaseIE): +class VidziIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' _TESTS = [{ 'url': 'http://vidzi.tv/cghql9yq6emu.html', diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 54eb51427..c022fb33e 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals +from .common import InfoExtractor from .youtube import YoutubeIE -from .jwplatform import JWPlatformBaseIE -class WimpIE(JWPlatformBaseIE): +class WimpIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.wimp.com/maru-is-exhausted/', From 4cead6a614b5a293e78dce5cd5eda7476f83985d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 22:02:01 +0700 Subject: [PATCH 0268/1696] [einthusan] Relax _VALID_URL (closes #12141, closes #12159) --- youtube_dl/extractor/einthusan.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 8a2a17b63..3f6268637 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -18,8 +18,8 @@ from ..utils import ( class EinthusanIE(InfoExtractor): - _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)' + _TESTS = [{ 'url': 'https://einthusan.tv/movie/watch/9097/', 'md5': 'ff0f7f2065031b8a2cf13a933731c035', 'info_dict': { @@ -29,7 +29,10 @@ class EinthusanIE(InfoExtractor): 'description': 'md5:33ef934c82a671a94652a9b4e54d931b', 'thumbnail': r're:^https?://.*\.jpg$', } - } + }, { + 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi', + 'only_matching': True, + }] # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js def _decrypt(self, encrypted_data, video_id): From fef51645d6c224f898ff6f44d041a458d21e8547 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 17 Feb 2017 23:13:51 +0800 Subject: [PATCH 0269/1696] [theplatform] Recognize URLs with whitespaces (closes #12044) --- ChangeLog | 1 + youtube_dl/extractor/generic.py | 7 ++++++- youtube_dl/extractor/theplatform.py | 6 ++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4e69b03d0..d5fe3dd5b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [theplatform] Recognize URLs with whitespaces (#12044) + [generic] Support complex JWPlayer embedded videos (#12030) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3db31debe..9868ca6d0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1501,7 +1501,12 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [VideoPressIE.ie_key()], - } + }, + { + # ThePlatform embedded with whitespaces in URLs + 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm', + 'only_matching': True, + }, # { # # TODO: find another test # # http://schema.org/VideoObject diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 5c5987c6a..9a424b1c6 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -179,10 +179,12 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): if m: return [m.group('url')] + # Are whitesapces ignored in URLs? + # https://github.com/rg3/youtube-dl/issues/12044 matches = re.findall( - r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) + r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) if matches: - return list(zip(*matches))[1] + return [re.sub(r'\s', '', list(zip(*matches))[1][0])] @staticmethod def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): From d94badc755228ee3159b9b499aa718d27fa472ed Mon Sep 17 00:00:00 2001 From: Vijay Singh <sudovijay@users.noreply.github.com> Date: Tue, 7 Feb 2017 10:32:45 +0530 Subject: [PATCH 0270/1696] [openload] Semifix extraction (closes #10408) just updated the code. i don't do much python still i tried to convert my code. lemme know if there is any prob with it --- youtube_dl/extractor/openload.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 32289d897..bd1120fd8 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -75,17 +75,20 @@ class OpenloadIE(InfoExtractor): '<span[^>]+id="[^"]+"[^>]*>([0-9]+)</span>', webpage, 'openload ID') - first_three_chars = int(float(ol_id[0:][:3])) - fifth_char = int(float(ol_id[3:5])) - urlcode = '' - num = 5 + first_two_chars = int(float(ol_id[0:][:2])) + urlcode = {} + num = 2 while num < len(ol_id): - urlcode += compat_chr(int(float(ol_id[num:][:3])) + - first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2]))) + key = int(float(ol_id[num + 3:][:2])) + urlcode[key] = compat_chr(int(float(ol_id[num:][:3])) - first_two_chars) num += 5 + + sorted(urlcode, key=lambda key: urlcode[key]) - video_url = 'https://openload.co/stream/' + urlcode + urllink = ''.join(['%s' % (value) for (key, value) in urlcode.items()]) + + video_url = 'https://openload.co/stream/' + urllink title = self._og_search_title(webpage, default=None) or self._search_regex( r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, From 90fad0e74cd8079246c5f3d8150650b5f65f998b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 22:31:16 +0700 Subject: [PATCH 0271/1696] [openload] Fix extraction (closes #12002) --- youtube_dl/extractor/openload.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index bd1120fd8..10896c442 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -76,19 +76,16 @@ class OpenloadIE(InfoExtractor): webpage, 'openload ID') first_two_chars = int(float(ol_id[0:][:2])) - urlcode = {} + urlcode = [] num = 2 while num < len(ol_id): key = int(float(ol_id[num + 3:][:2])) - urlcode[key] = compat_chr(int(float(ol_id[num:][:3])) - first_two_chars) + urlcode.append((key, compat_chr(int(float(ol_id[num:][:3])) - first_two_chars))) num += 5 - - sorted(urlcode, key=lambda key: urlcode[key]) - urllink = ''.join(['%s' % (value) for (key, value) in urlcode.items()]) - - video_url = 'https://openload.co/stream/' + urllink + video_url = 'https://openload.co/stream/' + ''.join( + [value for _, value in sorted(urlcode, key=lambda x: x[0])]) title = self._og_search_title(webpage, default=None) or self._search_regex( r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, From c2bde5d08163ce46548ea60333750a0a74a8fe44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= <TRox1972@noreply.github.com> Date: Mon, 9 Jan 2017 18:22:53 +0100 Subject: [PATCH 0272/1696] [ellentv] Improve --- youtube_dl/extractor/ellentv.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 74bbc5c51..e0a13dd76 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -1,13 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import json - from .common import InfoExtractor -from ..utils import ( - ExtractorError, - NO_DEFAULT, -) +from .kaltura import KalturaIE +from ..utils import NO_DEFAULT class EllenTVIE(InfoExtractor): @@ -65,7 +61,7 @@ class EllenTVIE(InfoExtractor): if partner_id and kaltura_id: break - return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') + return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key()) class EllenTVClipsIE(InfoExtractor): @@ -77,14 +73,14 @@ class EllenTVClipsIE(InfoExtractor): 'id': 'meryl-streep-vanessa-hudgens', 'title': 'Meryl Streep, Vanessa Hudgens', }, - 'playlist_mincount': 7, + 'playlist_mincount': 5, } def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - playlist = self._extract_playlist(webpage) + playlist = self._extract_playlist(webpage, playlist_id) return { '_type': 'playlist', @@ -93,16 +89,13 @@ class EllenTVClipsIE(InfoExtractor): 'entries': self._extract_entries(playlist) } - def _extract_playlist(self, webpage): + def _extract_playlist(self, webpage, playlist_id): json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json') - try: - return json.loads('[{' + json_string + '}]') - except ValueError as ve: - raise ExtractorError('Failed to download JSON', cause=ve) + return self._parse_json('[{' + json_string + '}]', playlist_id) def _extract_entries(self, playlist): return [ self.url_result( 'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']), - 'Kaltura') + KalturaIE.ie_key(), video_id=item['kaltura_entry_id']) for item in playlist] From db76c30c6ecb5d198a72f1807163c9b69771bba1 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher <tobias-git@23.gs> Date: Tue, 7 Jun 2016 23:42:56 +0200 Subject: [PATCH 0273/1696] [heise] Support videos embedded in any article. --- youtube_dl/extractor/heise.py | 119 +++++++++++++++++++++++++--------- 1 file changed, 90 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 1629cdb8d..a5ec0fae9 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -11,54 +11,115 @@ from ..utils import ( class HeiseIE(InfoExtractor): _VALID_URL = r'''(?x) - https?://(?:www\.)?heise\.de/video/artikel/ - .+?(?P<id>[0-9]+)\.html(?:$|[?#]) + https?://(?:www\.)?heise\.de/.+?(?P<id>[0-9]+)\.html(?:$|[?#]) ''' - _TEST = { - 'url': ( - 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' - ), - 'md5': 'ffed432483e922e88545ad9f2f15d30e', - 'info_dict': { - 'id': '2404147', - 'ext': 'mp4', - 'title': ( - "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" + _TESTS = [ + { + 'url': ( + 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' ), - 'format_id': 'mp4_720p', - 'timestamp': 1411812600, - 'upload_date': '20140927', - 'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', - 'thumbnail': r're:^https?://.*\.jpe?g$', - } - } + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2404147', + 'ext': 'mp4', + 'title': ( + "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" + ), + 'format_id': 'mp4_720p', + 'timestamp': 1411812600, + 'upload_date': '20140927', + 'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, + { + 'url': ( + 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html' + ), + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2403911', + 'ext': 'mp4', + 'title': ( + "c't uplink 3.3: Owncloud, Tastaturen, Peilsender Smartphone" + ), + 'format_id': 'mp4_720p', + 'timestamp': 1411803000, + 'upload_date': '20140927', + 'description': "In c't uplink erklären wir in dieser Woche, wie man mit Owncloud die Kontrolle über die eigenen Daten behält. Darüber hinaus erklären wir, dass zur Wahl der richtigen Tastatur mehr gehört, als man denkt und wie Smartphones uns weiter verraten.", + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, + { + 'url': ( + 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom' + ), + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2404251', + 'ext': 'mp4', + 'title': ( + "c't uplink: Owncloud, Tastaturen, Peilsender Smartphone" + ), + 'format_id': 'mp4_720p', + 'timestamp': 1411811400, + 'upload_date': '20140927', + 'description': 'In uplink-Episode 3.3 sprechen wir über Owncloud und wie man sich damit von Cloudanbietern emanzipieren kann. Außerdem erklären wir, woran man alles beim Kauf einer Tastatur denken sollte und was Smartphones nun über uns verraten.', + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, + { + 'url': ( + 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html' + ), + 'md5': '0616c9297d9c989f9b2a23b483b408c3', + 'info_dict': { + 'id': '3214137', + 'ext': 'mp4', + 'title': ( + "c\u2019t zockt \u201eGlitchspace\u201c, \u201eThe Mind's Eclipse\u201c und \u201eWindowframe\u201c." + ), + 'format_id': 'mp4_720p', + 'timestamp': 1464011220, + 'upload_date': '20160523', + 'description': "Unsere Spiele-Tipps der Woche: Das Puzzle-Adventure Glitchspace, das Jump&Run-Spiel Windowframe und The Mind's Eclipse", + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, + + ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) container_id = self._search_regex( - r'<div class="videoplayerjw".*?data-container="([0-9]+)"', + r'<div class="videoplayerjw"[^>]*data-container="([0-9]+)"', webpage, 'container ID') sequenz_id = self._search_regex( - r'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"', + r'<div class="videoplayerjw"[^>]*data-sequenz="([0-9]+)"', webpage, 'sequenz ID') data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id) doc = self._download_xml(data_url, video_id) info = { 'id': video_id, - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': doc.find('.//{http://rss.jwpcdn.com/}image').text, 'timestamp': parse_iso8601( - self._html_search_meta('date', webpage)), - 'description': self._og_search_description(webpage), + self._html_search_meta('date', webpage)) } - title = self._html_search_meta('fulltitle', webpage) - if title: - info['title'] = title - else: - info['title'] = self._og_search_title(webpage) + title = self._html_search_meta('fulltitle', webpage, default=None) + if not title or title == "c't": + title = self._search_regex( + r'<div class="videoplayerjw"[^>]*data-title="([^"]+)"', + webpage, 'video title') + info['title'] = title + + desc = self._og_search_description(webpage, default=None) + if not desc: + desc = self._html_search_meta('description', webpage) + info['description'] = desc formats = [] for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'): From bad4ccdb5db7c00865d433558ddfcdfdbd499343 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 23:09:40 +0700 Subject: [PATCH 0274/1696] [heise] Improve (closes #9725) --- youtube_dl/extractor/heise.py | 146 +++++++++++----------------------- 1 file changed, 48 insertions(+), 98 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index a5ec0fae9..382f32771 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -6,120 +6,58 @@ from ..utils import ( determine_ext, int_or_none, parse_iso8601, + xpath_text, ) class HeiseIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?://(?:www\.)?heise\.de/.+?(?P<id>[0-9]+)\.html(?:$|[?#]) - ''' - _TESTS = [ - { - 'url': ( - 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html' - ), - 'md5': 'ffed432483e922e88545ad9f2f15d30e', - 'info_dict': { - 'id': '2404147', - 'ext': 'mp4', - 'title': ( - "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" - ), - 'format_id': 'mp4_720p', - 'timestamp': 1411812600, - 'upload_date': '20140927', - 'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', - 'thumbnail': r're:^https?://.*/gallery/$', - } - }, - { - 'url': ( - 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html' - ), - 'md5': 'ffed432483e922e88545ad9f2f15d30e', - 'info_dict': { - 'id': '2403911', - 'ext': 'mp4', - 'title': ( - "c't uplink 3.3: Owncloud, Tastaturen, Peilsender Smartphone" - ), - 'format_id': 'mp4_720p', - 'timestamp': 1411803000, - 'upload_date': '20140927', - 'description': "In c't uplink erklären wir in dieser Woche, wie man mit Owncloud die Kontrolle über die eigenen Daten behält. Darüber hinaus erklären wir, dass zur Wahl der richtigen Tastatur mehr gehört, als man denkt und wie Smartphones uns weiter verraten.", - 'thumbnail': r're:^https?://.*/gallery/$', - } - }, - { - 'url': ( - 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom' - ), - 'md5': 'ffed432483e922e88545ad9f2f15d30e', - 'info_dict': { - 'id': '2404251', - 'ext': 'mp4', - 'title': ( - "c't uplink: Owncloud, Tastaturen, Peilsender Smartphone" - ), - 'format_id': 'mp4_720p', - 'timestamp': 1411811400, - 'upload_date': '20140927', - 'description': 'In uplink-Episode 3.3 sprechen wir über Owncloud und wie man sich damit von Cloudanbietern emanzipieren kann. Außerdem erklären wir, woran man alles beim Kauf einer Tastatur denken sollte und was Smartphones nun über uns verraten.', - 'thumbnail': r're:^https?://.*/gallery/$', - } - }, - { - 'url': ( - 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html' - ), - 'md5': '0616c9297d9c989f9b2a23b483b408c3', - 'info_dict': { - 'id': '3214137', - 'ext': 'mp4', - 'title': ( - "c\u2019t zockt \u201eGlitchspace\u201c, \u201eThe Mind's Eclipse\u201c und \u201eWindowframe\u201c." - ), - 'format_id': 'mp4_720p', - 'timestamp': 1464011220, - 'upload_date': '20160523', - 'description': "Unsere Spiele-Tipps der Woche: Das Puzzle-Adventure Glitchspace, das Jump&Run-Spiel Windowframe und The Mind's Eclipse", - 'thumbnail': r're:^https?://.*/gallery/$', - } - }, - - ] + _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html' + _TESTS = [{ + 'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html', + 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'info_dict': { + 'id': '2404147', + 'ext': 'mp4', + 'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone", + 'format_id': 'mp4_720p', + 'timestamp': 1411812600, + 'upload_date': '20140927', + 'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20', + 'thumbnail': r're:^https?://.*/gallery/$', + } + }, { + 'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html', + 'only_matching': True, + }, { + 'url': 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom', + 'only_matching': True, + }, { + 'url': 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) container_id = self._search_regex( - r'<div class="videoplayerjw"[^>]*data-container="([0-9]+)"', + r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"', webpage, 'container ID') sequenz_id = self._search_regex( - r'<div class="videoplayerjw"[^>]*data-sequenz="([0-9]+)"', + r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"', webpage, 'sequenz ID') - data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id) - doc = self._download_xml(data_url, video_id) - - info = { - 'id': video_id, - 'thumbnail': doc.find('.//{http://rss.jwpcdn.com/}image').text, - 'timestamp': parse_iso8601( - self._html_search_meta('date', webpage)) - } title = self._html_search_meta('fulltitle', webpage, default=None) if not title or title == "c't": title = self._search_regex( - r'<div class="videoplayerjw"[^>]*data-title="([^"]+)"', - webpage, 'video title') - info['title'] = title + r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"', + webpage, 'title') - desc = self._og_search_description(webpage, default=None) - if not desc: - desc = self._html_search_meta('description', webpage) - info['description'] = desc + doc = self._download_xml( + 'http://www.heise.de/videout/feed', video_id, query={ + 'container': container_id, + 'sequenz': sequenz_id, + }) formats = [] for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'): @@ -135,6 +73,18 @@ class HeiseIE(InfoExtractor): 'height': height, }) self._sort_formats(formats) - info['formats'] = formats - return info + description = self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'description', webpage) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or + self._og_search_thumbnail(webpage)), + 'timestamp': parse_iso8601( + self._html_search_meta('date', webpage)), + 'formats': formats, + } From 2c1f442c2bb4de65479f2e6c2f81c5741445184e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 23:18:26 +0700 Subject: [PATCH 0275/1696] [options] Add missing spaces --- youtube_dl/options.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 349f44778..2fea99ff2 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -298,14 +298,14 @@ def parseOpts(overrideArguments=None): metavar='FILTER', dest='match_filter', default=None, help=( 'Generic video filter. ' - 'Specify any key (see help for -o for a list of available keys) to' - ' match if the key is present, ' - '!key to check if the key is not present,' + 'Specify any key (see help for -o for a list of available keys) to ' + 'match if the key is present, ' + '!key to check if the key is not present, ' 'key > NUMBER (like "comment_count > 12", also works with ' '>=, <, <=, !=, =) to compare against a number, and ' '& to require multiple matches. ' - 'Values which are not known are excluded unless you' - ' put a question mark (?) after the operator.' + 'Values which are not known are excluded unless you ' + 'put a question mark (?) after the operator. ' 'For example, to only match videos that have been liked more than ' '100 times and disliked less than 50 times (or the dislike ' 'functionality is not available at the given service), but who ' From cf3704c132800809caacc6ce89afa87f0dfae487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 23:47:54 +0700 Subject: [PATCH 0276/1696] [ChangeLog] Actualize --- ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index d5fe3dd5b..00ee0a5a9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,13 @@ version <unreleased> Extractors +* [heise] Improve extraction (#9725) +* [ellentv] Improve (#11653) +* [openload] Fix extraction (#10408, #12002) + [theplatform] Recognize URLs with whitespaces (#12044) +* [einthusan] Relax URL regular expression (#12141, #12159) + [generic] Support complex JWPlayer embedded videos (#12030) +* [elpais] Improve extraction (#12139) version 2017.02.16 From 28e35f50702a8841b4caf072a546ff06ca63db96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Feb 2017 23:59:56 +0700 Subject: [PATCH 0277/1696] release 2017.02.17 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 4 ++-- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 06711f73b..6f1361b32 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.17** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.16 +[debug] youtube-dl version 2017.02.17 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 00ee0a5a9..2c90f791d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.17 Extractors * [heise] Improve extraction (#9725) diff --git a/README.md b/README.md index 89876bd7a..c2a1a6b02 100644 --- a/README.md +++ b/README.md @@ -137,13 +137,13 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --match-filter FILTER Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to - check if the key is not present,key > + check if the key is not present, key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) - after the operator.For example, to only + after the operator. For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 323e80954..530e1856b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.16' +__version__ = '2017.02.17' From 70bcc444a990ee9ca3daab6f3dc2d5d58a948ba4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 18 Feb 2017 09:52:43 +0100 Subject: [PATCH 0278/1696] [viceland] improve info extraction and update test --- youtube_dl/extractor/vice.py | 6 +++--- youtube_dl/extractor/viceland.py | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 8a00c8fee..f0a7fd739 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -70,10 +70,10 @@ class ViceBaseIE(AdobePassIE): 'url': uplynk_preplay_url, 'id': video_id, 'title': title, - 'description': base.get('body'), + 'description': base.get('body') or base.get('display_body'), 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), - 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')), - 'timestamp': int_or_none(video_data.get('created_at')), + 'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')), + 'timestamp': int_or_none(video_data.get('created_at'), 1000), 'age_limit': parse_age_limit(video_data.get('video_rating')), 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index 0eff055a6..87f9216b5 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -7,16 +7,16 @@ from .vice import ViceBaseIE class VicelandIE(ViceBaseIE): _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)' _TEST = { - 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', + 'url': 'https://www.viceland.com/en_us/video/trapped/588a70d0dba8a16007de7316', 'info_dict': { - 'id': '57608447973ee7705f6fbd4e', + 'id': '588a70d0dba8a16007de7316', 'ext': 'mp4', - 'title': 'CYBERWAR (Trailer)', - 'description': 'Tapping into the geopolitics of hacking and surveillance, Ben Makuch travels the world to meet with hackers, government officials, and dissidents to investigate the ecosystem of cyberwarfare.', + 'title': 'TRAPPED (Series Trailer)', + 'description': 'md5:7a8e95c2b6cd86461502a2845e581ccf', 'age_limit': 14, - 'timestamp': 1466008539, - 'upload_date': '20160615', - 'uploader_id': '11', + 'timestamp': 1485474122, + 'upload_date': '20170126', + 'uploader_id': '57a204098cb727dec794c6a3', 'uploader': 'Viceland', }, 'params': { From bdabbc220c60ea6be50c9b1058405b636f70fb71 Mon Sep 17 00:00:00 2001 From: Alex Monk <krenair@gmail.com> Date: Wed, 17 Aug 2016 21:13:28 +0100 Subject: [PATCH 0279/1696] [metacafe] Bypass family filter If you don't send this user=ffilter: false cookie, it will 301 redirect you to a page asking about it, and then the title check will fail. --- youtube_dl/extractor/metacafe.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 9880924e6..adbd44fd1 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -50,6 +50,18 @@ class MetacafeIE(InfoExtractor): }, 'skip': 'Page is temporarily unavailable.', }, + # metacafe video with family filter + { + 'url': 'http://www.metacafe.com/watch/2155630/adult_art_by_david_hart_156/', + 'md5': 'b06082c5079bbdcde677a6291fbdf376', + 'info_dict': { + 'id': '2155630', + 'ext': 'mp4', + 'title': 'Adult Art By David Hart #156', + 'uploader': 'hartistry', + 'description': 'Adult Art By David Hart. All the Art Works presented here are not in the possession of the American Artist, David John Hart. The paintings are in collections worldwide of individuals, countries, art museums, foundations and charities.', + } + }, # AnyClip video { 'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/', @@ -148,8 +160,9 @@ class MetacafeIE(InfoExtractor): # AnyClip videos require the flashversion cookie so that we get the link # to the mp4 file headers = {} + headers['Cookie'] = 'user=%7B%22ffilter%22%3Afalse%7D;'; if video_id.startswith('an-'): - headers['Cookie'] = 'flashVersion=0;' + headers['Cookie'] += ' flashVersion=0;' # Retrieve video webpage to extract further information webpage = self._download_webpage(url, video_id, headers=headers) From f75caf059eb7a1a156921124cbf4b720fea526e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Feb 2017 19:58:25 +0700 Subject: [PATCH 0280/1696] [metacafe] Improve (closes #10371) --- youtube_dl/extractor/metacafe.py | 38 +++++++++++--------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index adbd44fd1..28f59f63c 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -6,12 +6,12 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, ExtractorError, int_or_none, - urlencode_postdata, get_element_by_attribute, mimetype2ext, ) @@ -57,10 +57,13 @@ class MetacafeIE(InfoExtractor): 'info_dict': { 'id': '2155630', 'ext': 'mp4', - 'title': 'Adult Art By David Hart #156', - 'uploader': 'hartistry', - 'description': 'Adult Art By David Hart. All the Art Works presented here are not in the possession of the American Artist, David John Hart. The paintings are in collections worldwide of individuals, countries, art museums, foundations and charities.', - } + 'title': 'Adult Art By David Hart 156', + 'uploader': '63346', + 'description': 'md5:9afac8fc885252201ad14563694040fc', + }, + 'params': { + 'skip_download': True, + }, }, # AnyClip video { @@ -124,22 +127,6 @@ class MetacafeIE(InfoExtractor): def report_disclaimer(self): self.to_screen('Retrieving disclaimer') - def _confirm_age(self): - # Retrieve disclaimer - self.report_disclaimer() - self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer') - - # Confirm age - self.report_age_confirmation() - self._download_webpage( - self._FILTER_POST, None, False, 'Unable to confirm age', - data=urlencode_postdata({ - 'filters': '0', - 'submit': "Continue - I'm over 18", - }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) - def _real_extract(self, url): # Extract id and simplified title from URL video_id, display_id = re.match(self._VALID_URL, url).groups() @@ -155,14 +142,15 @@ class MetacafeIE(InfoExtractor): if prefix == 'cb': return self.url_result('theplatform:%s' % ext_id, 'ThePlatform') - # self._confirm_age() + headers = { + # Disable family filter + 'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False}) + } # AnyClip videos require the flashversion cookie so that we get the link # to the mp4 file - headers = {} - headers['Cookie'] = 'user=%7B%22ffilter%22%3Afalse%7D;'; if video_id.startswith('an-'): - headers['Cookie'] += ' flashVersion=0;' + headers['Cookie'] += 'flashVersion=0; ' # Retrieve video webpage to extract further information webpage = self._download_webpage(url, video_id, headers=headers) From a2e3286676606103601f9499154ad465037314d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Feb 2017 20:21:37 +0700 Subject: [PATCH 0281/1696] [thisav] Add support for html5 media (closes #11771) --- youtube_dl/extractor/thisav.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index b7b3568cb..33683b139 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -10,6 +10,7 @@ from ..utils import remove_end class ThisAVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' _TESTS = [{ + # jwplayer 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html', 'md5': '0480f1ef3932d901f0e0e719f188f19b', 'info_dict': { @@ -20,6 +21,7 @@ class ThisAVIE(InfoExtractor): 'uploader_id': 'dj7970' } }, { + # html5 media 'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html', 'md5': 'ba90c076bd0f80203679e5b60bf523ee', 'info_dict': { @@ -48,8 +50,12 @@ class ThisAVIE(InfoExtractor): }], } else: - info_dict = self._extract_jwplayer_data( - webpage, video_id, require_title=False) + entries = self._parse_html5_media_entries(url, webpage, video_id) + if entries: + info_dict = entries[0] + else: + info_dict = self._extract_jwplayer_data( + webpage, video_id, require_title=False) uploader = self._html_search_regex( r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', webpage, 'uploader name', fatal=False) From 02d9b82a233abcb778f3f8601b229f996fd7df94 Mon Sep 17 00:00:00 2001 From: Jakub Wilk <jwilk@jwilk.net> Date: Wed, 11 Jan 2017 18:49:40 +0100 Subject: [PATCH 0282/1696] [tvn24] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tvn24.py | 47 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 youtube_dl/extractor/tvn24.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index be3688d5a..55b4782d3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1009,6 +1009,7 @@ from .tvc import ( ) from .tvigle import TvigleIE from .tvland import TVLandIE +from .tvn24 import TVN24IE from .tvnoe import TVNoeIE from .tvp import ( TVPEmbedIE, diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py new file mode 100644 index 000000000..225ee4a6a --- /dev/null +++ b/youtube_dl/extractor/tvn24.py @@ -0,0 +1,47 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TVN24IE(InfoExtractor): + _VALID_URL = r'http://(?:tvn24bis|(?:www|fakty)\.tvn24)\.pl/.+/(?P<id>[^/]+)\.html' + _TEST = { + 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', + 'md5': 'fbdec753d7bc29d96036808275f2130c', + 'info_dict': { + 'id': '1584444', + 'ext': 'mp4', + 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', + 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', + 'thumbnail': 're:http://.*[.]jpeg', + } + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = self._html_search_regex(r'\bdata-poster="(.+?)"', webpage, 'data-poster') + share_params = self._html_search_regex(r'\bdata-share-params="(.+?)"', webpage, 'data-share-params') + share_params = self._parse_json(share_params, page_id) + video_id = share_params['id'] + quality_data = self._html_search_regex(r'\bdata-quality="(.+?)"', webpage, 'data-quality') + quality_data = self._parse_json(quality_data, page_id) + formats = [] + for format_id, url in quality_data.items(): + formats.append({ + 'format_id': format_id, + 'height': int(format_id.rstrip('p')), + 'url': url, + 'ext': 'mp4', + }) + self._sort_formats(formats) + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats, + } From e84888b4322abd2e2a74e8a89b7942a68dd0b6a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Feb 2017 23:34:09 +0700 Subject: [PATCH 0283/1696] [tvn24] Improve extraction (closes #11679) --- youtube_dl/extractor/tvn24.py | 59 ++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 225ee4a6a..12ed6039c 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -2,11 +2,15 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + int_or_none, + unescapeHTML, +) class TVN24IE(InfoExtractor): - _VALID_URL = r'http://(?:tvn24bis|(?:www|fakty)\.tvn24)\.pl/.+/(?P<id>[^/]+)\.html' - _TEST = { + _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)\.html' + _TESTS = [{ 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', 'md5': 'fbdec753d7bc29d96036808275f2130c', 'info_dict': { @@ -16,28 +20,53 @@ class TVN24IE(InfoExtractor): 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".', 'thumbnail': 're:http://.*[.]jpeg', } - } + }, { + 'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html', + 'only_matching': True, + }, { + 'url': 'http://sport.tvn24.pl/pilka-nozna,105/ligue-1-kamil-glik-rozcial-glowe-monaco-tylko-remisuje-z-bastia,716522.html', + 'only_matching': True, + }, { + 'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html', + 'only_matching': True, + }] def _real_extract(self, url): - page_id = self._match_id(url) - webpage = self._download_webpage(url, page_id) + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._html_search_regex(r'\bdata-poster="(.+?)"', webpage, 'data-poster') - share_params = self._html_search_regex(r'\bdata-share-params="(.+?)"', webpage, 'data-share-params') - share_params = self._parse_json(share_params, page_id) - video_id = share_params['id'] - quality_data = self._html_search_regex(r'\bdata-quality="(.+?)"', webpage, 'data-quality') - quality_data = self._parse_json(quality_data, page_id) + + def extract_json(attr, name, fatal=True): + return self._parse_json( + self._search_regex( + r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage, + name, group='json', fatal=fatal) or '{}', + video_id, transform_source=unescapeHTML, fatal=fatal) + + quality_data = extract_json('data-quality', 'formats') + formats = [] for format_id, url in quality_data.items(): formats.append({ - 'format_id': format_id, - 'height': int(format_id.rstrip('p')), 'url': url, - 'ext': 'mp4', + 'format_id': format_id, + 'height': int_or_none(format_id.rstrip('p')), }) self._sort_formats(formats) + + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_regex( + r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage, + 'thumbnail', group='url') + + share_params = extract_json( + 'data-share-params', 'share params', fatal=False) + if isinstance(share_params, dict): + video_id = share_params.get('id') or video_id + return { 'id': video_id, 'title': title, From ac33accd96279ee541952aaa4f0bb72b4f76b9ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Feb 2017 23:59:26 +0700 Subject: [PATCH 0284/1696] [options] Mention quoted string literals for --match-filter --- youtube_dl/options.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 2fea99ff2..deff54324 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -302,8 +302,10 @@ def parseOpts(overrideArguments=None): 'match if the key is present, ' '!key to check if the key is not present, ' 'key > NUMBER (like "comment_count > 12", also works with ' - '>=, <, <=, !=, =) to compare against a number, and ' - '& to require multiple matches. ' + '>=, <, <=, !=, =) to compare against a number, ' + 'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) ' + 'to match against a string literal ' + 'and & to require multiple matches. ' 'Values which are not known are excluded unless you ' 'put a question mark (?) after the operator. ' 'For example, to only match videos that have been liked more than ' From 049a0f4d6da55f4062658da7593363147c92f4a8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 18 Feb 2017 21:07:09 +0100 Subject: [PATCH 0285/1696] [brightcove:legacy] restrict videoPlayer value(closes #12040) --- youtube_dl/extractor/brightcove.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 5c6e99da1..27685eed0 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -191,6 +191,10 @@ class BrightcoveLegacyIE(InfoExtractor): # These fields hold the id of the video videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') if videoPlayer is not None: + if isinstance(videoPlayer, list): + videoPlayer = videoPlayer[0] + if not (videoPlayer.isdigit() or videoPlayer.startswith('ref:')): + return None params['@videoPlayer'] = videoPlayer linkBase = find_param('linkBaseURL') if linkBase is not None: From bf5b9d859a1f2a68fda0dc57eb839448c7571dfa Mon Sep 17 00:00:00 2001 From: Pierre Mdawar <p.mdawar@gmail.com> Date: Mon, 17 Oct 2016 14:38:37 +0300 Subject: [PATCH 0286/1696] [utils] Introduce YoutubeDLError base class for all youtube-dl exceptions --- youtube_dl/utils.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 07c07be6f..3f9e592e3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -701,7 +701,12 @@ def bug_reports_message(): return msg -class ExtractorError(Exception): +class YoutubeDLError(Exception): + """Base exception for YoutubeDL errors.""" + pass + + +class ExtractorError(YoutubeDLError): """Error during info extraction.""" def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None): @@ -742,7 +747,7 @@ class RegexNotFoundError(ExtractorError): pass -class DownloadError(Exception): +class DownloadError(YoutubeDLError): """Download Error exception. This exception may be thrown by FileDownloader objects if they are not @@ -756,7 +761,7 @@ class DownloadError(Exception): self.exc_info = exc_info -class SameFileError(Exception): +class SameFileError(YoutubeDLError): """Same File exception. This exception will be thrown by FileDownloader objects if they detect @@ -765,7 +770,7 @@ class SameFileError(Exception): pass -class PostProcessingError(Exception): +class PostProcessingError(YoutubeDLError): """Post Processing exception. This exception may be raised by PostProcessor's .run() method to @@ -773,15 +778,16 @@ class PostProcessingError(Exception): """ def __init__(self, msg): + super(PostProcessingError, self).__init__(msg) self.msg = msg -class MaxDownloadsReached(Exception): +class MaxDownloadsReached(YoutubeDLError): """ --max-downloads limit has been reached. """ pass -class UnavailableVideoError(Exception): +class UnavailableVideoError(YoutubeDLError): """Unavailable Format exception. This exception will be thrown when a video is requested @@ -790,7 +796,7 @@ class UnavailableVideoError(Exception): pass -class ContentTooShortError(Exception): +class ContentTooShortError(YoutubeDLError): """Content Too Short exception. This exception may be raised by FileDownloader objects when a file they @@ -799,12 +805,15 @@ class ContentTooShortError(Exception): """ def __init__(self, downloaded, expected): + super(ContentTooShortError, self).__init__( + 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected) + ) # Both in bytes self.downloaded = downloaded self.expected = expected -class XAttrMetadataError(Exception): +class XAttrMetadataError(YoutubeDLError): def __init__(self, code=None, msg='Unknown error'): super(XAttrMetadataError, self).__init__(msg) self.code = code @@ -820,7 +829,7 @@ class XAttrMetadataError(Exception): self.reason = 'NOT_SUPPORTED' -class XAttrUnavailableError(Exception): +class XAttrUnavailableError(YoutubeDLError): pass From 773f291dcbce486fefe24e1abd29735d374d0a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:49:58 +0700 Subject: [PATCH 0287/1696] Add experimental geo restriction bypass mechanism Based on faking X-Forwarded-For HTTP header --- youtube_dl/YoutubeDL.py | 17 +++ youtube_dl/__init__.py | 2 + youtube_dl/extractor/common.py | 48 +++++- youtube_dl/options.py | 12 ++ youtube_dl/utils.py | 267 +++++++++++++++++++++++++++++++++ 5 files changed, 340 insertions(+), 6 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a7bf5a1b0..ebace6b57 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -56,6 +56,8 @@ from .utils import ( ExtractorError, format_bytes, formatSeconds, + GeoRestrictedError, + ISO3166Utils, locked_file, make_HTTPS_handler, MaxDownloadsReached, @@ -272,6 +274,13 @@ class YoutubeDL(object): If it returns None, the video is downloaded. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. + bypass_geo_restriction: + Bypass geographic restriction via faking X-Forwarded-For + HTTP header (experimental) + bypass_geo_restriction_as_country: + Two-letter ISO 3166-2 country code that will be used for + explicit geographic restriction bypassing via faking + X-Forwarded-For HTTP header (experimental) The following options determine which downloader is picked: external_downloader: Executable of the external downloader to call. @@ -707,6 +716,14 @@ class YoutubeDL(object): return self.process_ie_result(ie_result, download, extra_info) else: return ie_result + except GeoRestrictedError as e: + msg = e.msg + if e.countries: + msg += '\nThis video is available in %s.' % ', '.join( + map(ISO3166Utils.short2full, e.countries)) + msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' + self.report_error(msg) + break except ExtractorError as e: # An error we somewhat expected self.report_error(compat_str(e), e.format_traceback()) break diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5c5b8094b..94f461a78 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -414,6 +414,8 @@ def _real_main(argv=None): 'cn_verification_proxy': opts.cn_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy, 'config_location': opts.config_location, + 'bypass_geo_restriction': opts.bypass_geo_restriction, + 'bypass_geo_restriction_as_country': opts.bypass_geo_restriction_as_country, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f6ff56eda..96815099d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -6,6 +6,7 @@ import hashlib import json import netrc import os +import random import re import socket import sys @@ -39,6 +40,8 @@ from ..utils import ( ExtractorError, fix_xml_ampersands, float_or_none, + GeoRestrictedError, + GeoUtils, int_or_none, js_to_json, parse_iso8601, @@ -320,17 +323,25 @@ class InfoExtractor(object): _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. + _BYPASS_GEO attribute may be set to False in order to disable + geo restriction bypass mechanisms for a particular extractor. + Though it won't disable explicit geo restriction bypass based on + country code provided with bypass_geo_restriction_as_country. + Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. """ _ready = False _downloader = None + _x_forwarded_for_ip = None + _BYPASS_GEO = True _WORKING = True def __init__(self, downloader=None): """Constructor. Receives an optional downloader.""" self._ready = False + self._x_forwarded_for_ip = None self.set_downloader(downloader) @classmethod @@ -359,6 +370,10 @@ class InfoExtractor(object): def initialize(self): """Initializes an instance (authentication, etc).""" + if not self._x_forwarded_for_ip: + country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None) + if country_code: + self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if not self._ready: self._real_initialize() self._ready = True @@ -366,8 +381,22 @@ class InfoExtractor(object): def extract(self, url): """Extracts URL information and returns it in list of dicts.""" try: - self.initialize() - return self._real_extract(url) + for _ in range(2): + try: + self.initialize() + return self._real_extract(url) + except GeoRestrictedError as e: + if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and + self._BYPASS_GEO and + self._downloader.params.get('bypass_geo_restriction', True) and + not self._x_forwarded_for_ip and + e.countries): + self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries)) + if self._x_forwarded_for_ip: + self.report_warning( + 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) + continue + raise except ExtractorError: raise except compat_http_client.IncompleteRead as e: @@ -434,6 +463,15 @@ class InfoExtractor(object): if isinstance(url_or_request, (compat_str, str)): url_or_request = url_or_request.partition('#')[0] + # Some sites check X-Forwarded-For HTTP header in order to figure out + # the origin of the client behind proxy. This allows bypassing geo + # restriction by faking this header's value to IP that belongs to some + # geo unrestricted country. We will do so once we encounter any + # geo restriction error. + if self._x_forwarded_for_ip: + if 'X-Forwarded-For' not in headers: + headers['X-Forwarded-For'] = self._x_forwarded_for_ip + urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query) if urlh is False: assert not fatal @@ -609,10 +647,8 @@ class InfoExtractor(object): expected=True) @staticmethod - def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'): - raise ExtractorError( - '%s. You might want to use --proxy to workaround.' % msg, - expected=True) + def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None): + raise GeoRestrictedError(msg, countries=countries) # Methods for following #608 @staticmethod diff --git a/youtube_dl/options.py b/youtube_dl/options.py index deff54324..2e194f6dc 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -549,6 +549,18 @@ def parseOpts(overrideArguments=None): 'Upper bound of a range for randomized sleep before each download ' '(maximum possible number of seconds to sleep). Must only be used ' 'along with --min-sleep-interval.')) + workarounds.add_option( + '--bypass-geo', + action='store_true', dest='bypass_geo_restriction', default=True, + help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') + workarounds.add_option( + '--no-bypass-geo', + action='store_false', dest='bypass_geo_restriction', default=True, + help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') + workarounds.add_option( + '--bypass-geo-as-country', metavar='CODE', + dest='bypass_geo_restriction_as_country', default=None, + help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') verbosity.add_option( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3f9e592e3..4e76b6b7b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -23,6 +23,7 @@ import operator import os import pipes import platform +import random import re import socket import ssl @@ -747,6 +748,18 @@ class RegexNotFoundError(ExtractorError): pass +class GeoRestrictedError(ExtractorError): + """Geographic restriction Error exception. + + This exception may be thrown when a video is not available from your + geographic location due to geographic restrictions imposed by a website. + """ + def __init__(self, msg, countries=None): + super(GeoRestrictedError, self).__init__(msg, expected=True) + self.msg = msg + self.countries = countries + + class DownloadError(YoutubeDLError): """Download Error exception. @@ -3027,6 +3040,260 @@ class ISO3166Utils(object): return cls._country_map.get(code.upper()) +class GeoUtils(object): + # Major IPv4 address blocks per country + _country_ip_map = { + 'AD': '85.94.160.0/19', + 'AE': '94.200.0.0/13', + 'AF': '149.54.0.0/17', + 'AG': '209.59.64.0/18', + 'AI': '204.14.248.0/21', + 'AL': '46.99.0.0/16', + 'AM': '46.70.0.0/15', + 'AO': '105.168.0.0/13', + 'AP': '159.117.192.0/21', + 'AR': '181.0.0.0/12', + 'AS': '202.70.112.0/20', + 'AT': '84.112.0.0/13', + 'AU': '1.128.0.0/11', + 'AW': '181.41.0.0/18', + 'AZ': '5.191.0.0/16', + 'BA': '31.176.128.0/17', + 'BB': '65.48.128.0/17', + 'BD': '114.130.0.0/16', + 'BE': '57.0.0.0/8', + 'BF': '129.45.128.0/17', + 'BG': '95.42.0.0/15', + 'BH': '37.131.0.0/17', + 'BI': '154.117.192.0/18', + 'BJ': '137.255.0.0/16', + 'BL': '192.131.134.0/24', + 'BM': '196.12.64.0/18', + 'BN': '156.31.0.0/16', + 'BO': '161.56.0.0/16', + 'BQ': '161.0.80.0/20', + 'BR': '152.240.0.0/12', + 'BS': '24.51.64.0/18', + 'BT': '119.2.96.0/19', + 'BW': '168.167.0.0/16', + 'BY': '178.120.0.0/13', + 'BZ': '179.42.192.0/18', + 'CA': '99.224.0.0/11', + 'CD': '41.243.0.0/16', + 'CF': '196.32.200.0/21', + 'CG': '197.214.128.0/17', + 'CH': '85.0.0.0/13', + 'CI': '154.232.0.0/14', + 'CK': '202.65.32.0/19', + 'CL': '152.172.0.0/14', + 'CM': '165.210.0.0/15', + 'CN': '36.128.0.0/10', + 'CO': '181.240.0.0/12', + 'CR': '201.192.0.0/12', + 'CU': '152.206.0.0/15', + 'CV': '165.90.96.0/19', + 'CW': '190.88.128.0/17', + 'CY': '46.198.0.0/15', + 'CZ': '88.100.0.0/14', + 'DE': '53.0.0.0/8', + 'DJ': '197.241.0.0/17', + 'DK': '87.48.0.0/12', + 'DM': '192.243.48.0/20', + 'DO': '152.166.0.0/15', + 'DZ': '41.96.0.0/12', + 'EC': '186.68.0.0/15', + 'EE': '90.190.0.0/15', + 'EG': '156.160.0.0/11', + 'ER': '196.200.96.0/20', + 'ES': '88.0.0.0/11', + 'ET': '196.188.0.0/14', + 'EU': '2.16.0.0/13', + 'FI': '91.152.0.0/13', + 'FJ': '144.120.0.0/16', + 'FM': '119.252.112.0/20', + 'FO': '88.85.32.0/19', + 'FR': '90.0.0.0/9', + 'GA': '41.158.0.0/15', + 'GB': '25.0.0.0/8', + 'GD': '74.122.88.0/21', + 'GE': '31.146.0.0/16', + 'GF': '161.22.64.0/18', + 'GG': '62.68.160.0/19', + 'GH': '45.208.0.0/14', + 'GI': '85.115.128.0/19', + 'GL': '88.83.0.0/19', + 'GM': '160.182.0.0/15', + 'GN': '197.149.192.0/18', + 'GP': '104.250.0.0/19', + 'GQ': '105.235.224.0/20', + 'GR': '94.64.0.0/13', + 'GT': '168.234.0.0/16', + 'GU': '168.123.0.0/16', + 'GW': '197.214.80.0/20', + 'GY': '181.41.64.0/18', + 'HK': '113.252.0.0/14', + 'HN': '181.210.0.0/16', + 'HR': '93.136.0.0/13', + 'HT': '148.102.128.0/17', + 'HU': '84.0.0.0/14', + 'ID': '39.192.0.0/10', + 'IE': '87.32.0.0/12', + 'IL': '79.176.0.0/13', + 'IM': '5.62.80.0/20', + 'IN': '117.192.0.0/10', + 'IO': '203.83.48.0/21', + 'IQ': '37.236.0.0/14', + 'IR': '2.176.0.0/12', + 'IS': '82.221.0.0/16', + 'IT': '79.0.0.0/10', + 'JE': '87.244.64.0/18', + 'JM': '72.27.0.0/17', + 'JO': '176.29.0.0/16', + 'JP': '126.0.0.0/8', + 'KE': '105.48.0.0/12', + 'KG': '158.181.128.0/17', + 'KH': '36.37.128.0/17', + 'KI': '103.25.140.0/22', + 'KM': '197.255.224.0/20', + 'KN': '198.32.32.0/19', + 'KP': '175.45.176.0/22', + 'KR': '175.192.0.0/10', + 'KW': '37.36.0.0/14', + 'KY': '64.96.0.0/15', + 'KZ': '2.72.0.0/13', + 'LA': '115.84.64.0/18', + 'LB': '178.135.0.0/16', + 'LC': '192.147.231.0/24', + 'LI': '82.117.0.0/19', + 'LK': '112.134.0.0/15', + 'LR': '41.86.0.0/19', + 'LS': '129.232.0.0/17', + 'LT': '78.56.0.0/13', + 'LU': '188.42.0.0/16', + 'LV': '46.109.0.0/16', + 'LY': '41.252.0.0/14', + 'MA': '105.128.0.0/11', + 'MC': '88.209.64.0/18', + 'MD': '37.246.0.0/16', + 'ME': '178.175.0.0/17', + 'MF': '74.112.232.0/21', + 'MG': '154.126.0.0/17', + 'MH': '117.103.88.0/21', + 'MK': '77.28.0.0/15', + 'ML': '154.118.128.0/18', + 'MM': '37.111.0.0/17', + 'MN': '49.0.128.0/17', + 'MO': '60.246.0.0/16', + 'MP': '202.88.64.0/20', + 'MQ': '109.203.224.0/19', + 'MR': '41.188.64.0/18', + 'MS': '208.90.112.0/22', + 'MT': '46.11.0.0/16', + 'MU': '105.16.0.0/12', + 'MV': '27.114.128.0/18', + 'MW': '105.234.0.0/16', + 'MX': '187.192.0.0/11', + 'MY': '175.136.0.0/13', + 'MZ': '197.218.0.0/15', + 'NA': '41.182.0.0/16', + 'NC': '101.101.0.0/18', + 'NE': '197.214.0.0/18', + 'NF': '203.17.240.0/22', + 'NG': '105.112.0.0/12', + 'NI': '186.76.0.0/15', + 'NL': '145.96.0.0/11', + 'NO': '84.208.0.0/13', + 'NP': '36.252.0.0/15', + 'NR': '203.98.224.0/19', + 'NU': '49.156.48.0/22', + 'NZ': '49.224.0.0/14', + 'OM': '5.36.0.0/15', + 'PA': '186.72.0.0/15', + 'PE': '186.160.0.0/14', + 'PF': '123.50.64.0/18', + 'PG': '124.240.192.0/19', + 'PH': '49.144.0.0/13', + 'PK': '39.32.0.0/11', + 'PL': '83.0.0.0/11', + 'PM': '70.36.0.0/20', + 'PR': '66.50.0.0/16', + 'PS': '188.161.0.0/16', + 'PT': '85.240.0.0/13', + 'PW': '202.124.224.0/20', + 'PY': '181.120.0.0/14', + 'QA': '37.210.0.0/15', + 'RE': '139.26.0.0/16', + 'RO': '79.112.0.0/13', + 'RS': '178.220.0.0/14', + 'RU': '5.136.0.0/13', + 'RW': '105.178.0.0/15', + 'SA': '188.48.0.0/13', + 'SB': '202.1.160.0/19', + 'SC': '154.192.0.0/11', + 'SD': '154.96.0.0/13', + 'SE': '78.64.0.0/12', + 'SG': '152.56.0.0/14', + 'SI': '188.196.0.0/14', + 'SK': '78.98.0.0/15', + 'SL': '197.215.0.0/17', + 'SM': '89.186.32.0/19', + 'SN': '41.82.0.0/15', + 'SO': '197.220.64.0/19', + 'SR': '186.179.128.0/17', + 'SS': '105.235.208.0/21', + 'ST': '197.159.160.0/19', + 'SV': '168.243.0.0/16', + 'SX': '190.102.0.0/20', + 'SY': '5.0.0.0/16', + 'SZ': '41.84.224.0/19', + 'TC': '65.255.48.0/20', + 'TD': '154.68.128.0/19', + 'TG': '196.168.0.0/14', + 'TH': '171.96.0.0/13', + 'TJ': '85.9.128.0/18', + 'TK': '27.96.24.0/21', + 'TL': '180.189.160.0/20', + 'TM': '95.85.96.0/19', + 'TN': '197.0.0.0/11', + 'TO': '175.176.144.0/21', + 'TR': '78.160.0.0/11', + 'TT': '186.44.0.0/15', + 'TV': '202.2.96.0/19', + 'TW': '120.96.0.0/11', + 'TZ': '156.156.0.0/14', + 'UA': '93.72.0.0/13', + 'UG': '154.224.0.0/13', + 'US': '3.0.0.0/8', + 'UY': '167.56.0.0/13', + 'UZ': '82.215.64.0/18', + 'VA': '212.77.0.0/19', + 'VC': '24.92.144.0/20', + 'VE': '186.88.0.0/13', + 'VG': '172.103.64.0/18', + 'VI': '146.226.0.0/16', + 'VN': '14.160.0.0/11', + 'VU': '202.80.32.0/20', + 'WF': '117.20.32.0/21', + 'WS': '202.4.32.0/19', + 'YE': '134.35.0.0/16', + 'YT': '41.242.116.0/22', + 'ZA': '41.0.0.0/11', + 'ZM': '165.56.0.0/13', + 'ZW': '41.85.192.0/19', + } + + @classmethod + def random_ipv4(cls, code): + block = cls._country_ip_map.get(code.upper()) + if not block: + return None + addr, preflen = block.split('/') + addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] + addr_max = addr_min | (0xffffffff >> int(preflen)) + return socket.inet_ntoa( + compat_struct_pack('!I', random.randint(addr_min, addr_max))) + + class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): def __init__(self, proxies=None): # Set default handlers From d392005a795a6cf85fda3c0f982254f8a2731e94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:51:16 +0700 Subject: [PATCH 0288/1696] [dramafever] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/dramafever.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index bcd9fe2a0..755db806a 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -116,8 +116,9 @@ class DramaFeverIE(DramaFeverBaseIE): 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError): - raise ExtractorError( - 'Currently unavailable in your country.', expected=True) + self.raise_geo_restricted( + msg='Currently unavailable in your country', + countries=['US', 'CA']) raise series_id, episode_number = video_id.split('.') From e633f21a96f37a96e8ef0fd4d6c1e4d3c0b41fbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:51:33 +0700 Subject: [PATCH 0289/1696] [go] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/go.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index f28e6fbf5..ec902c670 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -101,6 +101,10 @@ class GoIE(AdobePassIE): video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers()) errors = entitlement.get('errors', {}).get('errors', []) if errors: + for error in errors: + if error.get('code') == 1002: + self.raise_geo_restricted( + error['message'], countries=['US']) error_message = ', '.join([error['message'] for error in errors]) raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) asset_url += '?' + entitlement['uplynkData']['sessionKey'] From 28200e654b8051cadca12e51bd57f77e1ff0a4ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:51:51 +0700 Subject: [PATCH 0290/1696] [itv] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/itv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index b0d860452..aabde15f3 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -98,7 +98,10 @@ class ITVIE(InfoExtractor): headers=headers, data=etree.tostring(req_env)) playlist = xpath_element(resp_env, './/Playlist') if playlist is None: + fault_code = xpath_text(resp_env, './/faultcode') fault_string = xpath_text(resp_env, './/faultstring') + if fault_code == 'InvalidGeoRegion': + self.raise_geo_restricted(msg=fault_string, countries=['GB']) raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) title = xpath_text(playlist, 'EpisodeTitle', fatal=True) video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) From ff4007891fde74212eb0898bb04c14b2de92ed03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:51:59 +0700 Subject: [PATCH 0291/1696] [nrk] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/nrk.py | 36 +++++------------------------------- 1 file changed, 5 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index fc3c0cd3c..78ece33e1 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import random import re from .common import InfoExtractor @@ -15,25 +14,6 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): - _faked_ip = None - - def _download_webpage_handle(self, *args, **kwargs): - # NRK checks X-Forwarded-For HTTP header in order to figure out the - # origin of the client behind proxy. This allows to bypass geo - # restriction by faking this header's value to some Norway IP. - # We will do so once we encounter any geo restriction error. - if self._faked_ip: - # NB: str is intentional - kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip - return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs) - - def _fake_ip(self): - # Use fake IP from 37.191.128.0/17 in order to workaround geo - # restriction - def octet(lb=0, ub=255): - return random.randint(lb, ub) - self._faked_ip = '37.191.%d.%d' % (octet(128), octet()) - def _real_extract(self, url): video_id = self._match_id(url) @@ -44,8 +24,6 @@ class NRKBaseIE(InfoExtractor): title = data.get('fullTitle') or data.get('mainTitle') or data['title'] video_id = data.get('id') or video_id - http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {} - entries = [] conviva = data.get('convivaStatistics') or {} @@ -90,7 +68,6 @@ class NRKBaseIE(InfoExtractor): 'duration': duration, 'subtitles': subtitles, 'formats': formats, - 'http_headers': http_headers, }) if not entries: @@ -107,19 +84,16 @@ class NRKBaseIE(InfoExtractor): }] if not entries: - message_type = data.get('messageType', '') - # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* - if 'IsGeoBlocked' in message_type and not self._faked_ip: - self.report_warning( - 'Video is geo restricted, trying to fake IP') - self._fake_ip() - return self._real_extract(url) - MESSAGES = { 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', 'ProgramRightsHasExpired': 'Programmet har gått ut', 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', } + message_type = data.get('messageType', '') + # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* + if 'IsGeoBlocked' in message_type: + self.raise_geo_restricted( + msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO']) raise ExtractorError( '%s said: %s' % (self.IE_NAME, MESSAGES.get( message_type, message_type)), From 01b1aa9ff408ce15b8bbea08dbc190f3282141a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:52:11 +0700 Subject: [PATCH 0292/1696] [ondemandkorea] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/ondemandkorea.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index dcd157777..0c85d549e 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -35,7 +35,8 @@ class OnDemandKoreaIE(InfoExtractor): if 'msg_block_01.png' in webpage: self.raise_geo_restricted( - 'This content is not available in your region') + msg='This content is not available in your region', + countries=['US', 'CA']) if 'This video is only available to ODK PLUS members.' in webpage: raise ExtractorError( From 8ab8066cf08352ad336c3ff594d0ac27f6c809c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:52:20 +0700 Subject: [PATCH 0293/1696] [pbs] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/pbs.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 6baed773f..64f47bae3 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -489,11 +489,12 @@ class PBSIE(InfoExtractor): headers=self.geo_verification_headers()) if redirect_info['status'] == 'error': + message = self._ERRORS.get( + redirect_info['http_code'], redirect_info['message']) + if redirect_info['http_code'] == 403: + self.raise_geo_restricted(msg=message, countries=['US']) raise ExtractorError( - '%s said: %s' % ( - self.IE_NAME, - self._ERRORS.get(redirect_info['http_code'], redirect_info['message'])), - expected=True) + '%s said: %s' % (self.IE_NAME, message), expected=True) format_url = redirect_info.get('url') if not format_url: From 04d906eae3071e37049cfcd2a02e9079b72a265c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:52:33 +0700 Subject: [PATCH 0294/1696] [svt] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/svt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 10cf80885..f2a2200bf 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -38,7 +38,8 @@ class SVTBaseIE(InfoExtractor): 'url': vurl, }) if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): - self.raise_geo_restricted('This video is only available in Sweden') + self.raise_geo_restricted( + 'This video is only available in Sweden', countries=['SE']) self._sort_formats(formats) subtitles = {} From 89cc7fe7705b6534f434b514265a0507b70ef40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:52:42 +0700 Subject: [PATCH 0295/1696] [vbox7] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/vbox7.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index bef639462..f86d804c1 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -78,7 +78,7 @@ class Vbox7IE(InfoExtractor): video_url = video['src'] if '/na.mp4' in video_url: - self.raise_geo_restricted() + self.raise_geo_restricted(countries=['BG']) uploader = video.get('uploader') From 71631862f4de5a10223642ebdbd5e10db374d270 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:55:23 +0700 Subject: [PATCH 0296/1696] [srgssr] Improve geo restriction detection --- youtube_dl/extractor/srgssr.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 319a48a7a..a35a0a538 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -14,6 +14,7 @@ from ..utils import ( class SRGSSRIE(InfoExtractor): _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)' + _BYPASS_GEO = False _ERRORS = { 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.', @@ -40,8 +41,11 @@ class SRGSSRIE(InfoExtractor): media_id)[media_type.capitalize()] if media_data.get('block') and media_data['block'] in self._ERRORS: - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, self._ERRORS[media_data['block']]), expected=True) + message = self._ERRORS[media_data['block']] + if media_data['block'] == 'GEOBLOCK': + self.raise_geo_restricted(msg=message, countries=['CH']) + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, message), expected=True) return media_data From 80b59020e02e9c61f74f8f8f8891f9745667edb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:55:31 +0700 Subject: [PATCH 0297/1696] [vgtv] Improve geo restriction detection --- youtube_dl/extractor/vgtv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 8a574bc26..1709fd6bb 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -14,6 +14,7 @@ from ..utils import ( class VGTVIE(XstreamIE): IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet' + _BYPASS_GEO = False _HOST_TO_APPNAME = { 'vgtv.no': 'vgtv', @@ -217,7 +218,7 @@ class VGTVIE(XstreamIE): properties = try_get( data, lambda x: x['streamConfiguration']['properties'], list) if properties and 'geoblocked' in properties: - raise self.raise_geo_restricted() + raise self.raise_geo_restricted(countries=['NO']) self._sort_formats(info['formats']) From 5d3fbf77d96ade64c645b6942979c0b99aa4d775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 18:55:39 +0700 Subject: [PATCH 0298/1696] [viki] Improve geo restriction detection --- youtube_dl/extractor/viki.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 9c48701c1..68a74e246 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -27,6 +27,7 @@ class VikiBaseIE(InfoExtractor): _APP_VERSION = '2.2.5.1428709186' _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)' + _BYPASS_GEO = False _NETRC_MACHINE = 'viki' _token = None @@ -77,8 +78,11 @@ class VikiBaseIE(InfoExtractor): def _check_errors(self, data): for reason, status in data.get('blocking', {}).items(): if status and reason in self._ERRORS: + message = self._ERRORS[reason] + if reason == 'geo': + self.raise_geo_restricted(msg=message) raise ExtractorError('%s said: %s' % ( - self.IE_NAME, self._ERRORS[reason]), expected=True) + self.IE_NAME, message), expected=True) def _real_initialize(self): self._login() From 18a0defab063523cd76a30be2dd5a80e9f9172d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 20:26:43 +0700 Subject: [PATCH 0299/1696] [utils] Make random_ipv4 return unicode string --- youtube_dl/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4e76b6b7b..cbf7639c5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3290,8 +3290,8 @@ class GeoUtils(object): addr, preflen = block.split('/') addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] addr_max = addr_min | (0xffffffff >> int(preflen)) - return socket.inet_ntoa( - compat_struct_pack('!I', random.randint(addr_min, addr_max))) + return compat_str(socket.inet_ntoa( + compat_struct_pack('!I', random.randint(addr_min, addr_max)))) class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): From 0016b84e16965a07c52946c4672363153e8b18a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Feb 2017 21:06:07 +0700 Subject: [PATCH 0300/1696] Add faked X-Forwarded-For to formats' HTTP headers --- youtube_dl/YoutubeDL.py | 14 ++++++++++++++ youtube_dl/extractor/common.py | 5 ++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ebace6b57..1c04e46c1 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -864,8 +864,14 @@ class YoutubeDL(object): if self.params.get('playlistrandom', False): random.shuffle(entries) + x_forwarded_for = ie_result.get('__x_forwarded_for_ip') + for i, entry in enumerate(entries, 1): self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) + # This __x_forwarded_for_ip thing is a bit ugly but requires + # minimal changes + if x_forwarded_for: + entry['__x_forwarded_for_ip'] = x_forwarded_for extra = { 'n_entries': n_entries, 'playlist': playlist, @@ -1250,6 +1256,11 @@ class YoutubeDL(object): if cookies: res['Cookie'] = cookies + if 'X-Forwarded-For' not in res: + x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') + if x_forwarded_for_ip: + res['X-Forwarded-For'] = x_forwarded_for_ip + return res def _calc_cookies(self, info_dict): @@ -1392,6 +1403,9 @@ class YoutubeDL(object): full_format_info = info_dict.copy() full_format_info.update(format) format['http_headers'] = self._calc_headers(full_format_info) + # Remove private housekeeping stuff + if '__x_forwarded_for_ip' in info_dict: + del info_dict['__x_forwarded_for_ip'] # TODO Central sorting goes here diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 96815099d..c1f7f28a0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -384,7 +384,10 @@ class InfoExtractor(object): for _ in range(2): try: self.initialize() - return self._real_extract(url) + ie_result = self._real_extract(url) + if self._x_forwarded_for_ip: + ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip + return ie_result except GeoRestrictedError as e: if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and self._BYPASS_GEO and From 0a840f584c3f1fedb6957c05587dec697143f2d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 01:53:41 +0700 Subject: [PATCH 0301/1696] Rename bypass geo restriction options --- youtube_dl/YoutubeDL.py | 5 ++--- youtube_dl/__init__.py | 4 ++-- youtube_dl/extractor/common.py | 8 ++++---- youtube_dl/options.py | 12 ++++++------ 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1c04e46c1..68000dea2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -274,10 +274,9 @@ class YoutubeDL(object): If it returns None, the video is downloaded. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. - bypass_geo_restriction: - Bypass geographic restriction via faking X-Forwarded-For + geo_bypass: Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental) - bypass_geo_restriction_as_country: + geo_bypass_country: Two-letter ISO 3166-2 country code that will be used for explicit geographic restriction bypassing via faking X-Forwarded-For HTTP header (experimental) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 94f461a78..f91d29a7b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -414,8 +414,8 @@ def _real_main(argv=None): 'cn_verification_proxy': opts.cn_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy, 'config_location': opts.config_location, - 'bypass_geo_restriction': opts.bypass_geo_restriction, - 'bypass_geo_restriction_as_country': opts.bypass_geo_restriction_as_country, + 'geo_bypass': opts.geo_bypass, + 'geo_bypass_country': opts.geo_bypass_country, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c1f7f28a0..6eb6a25b8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -326,7 +326,7 @@ class InfoExtractor(object): _BYPASS_GEO attribute may be set to False in order to disable geo restriction bypass mechanisms for a particular extractor. Though it won't disable explicit geo restriction bypass based on - country code provided with bypass_geo_restriction_as_country. + country code provided with geo_bypass_country. Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. @@ -371,7 +371,7 @@ class InfoExtractor(object): def initialize(self): """Initializes an instance (authentication, etc).""" if not self._x_forwarded_for_ip: - country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None) + country_code = self._downloader.params.get('geo_bypass_country', None) if country_code: self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if not self._ready: @@ -389,9 +389,9 @@ class InfoExtractor(object): ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip return ie_result except GeoRestrictedError as e: - if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and + if (not self._downloader.params.get('geo_bypass_country', None) and self._BYPASS_GEO and - self._downloader.params.get('bypass_geo_restriction', True) and + self._downloader.params.get('geo_bypass', True) and not self._x_forwarded_for_ip and e.countries): self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries)) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 2e194f6dc..ae3f50754 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -550,16 +550,16 @@ def parseOpts(overrideArguments=None): '(maximum possible number of seconds to sleep). Must only be used ' 'along with --min-sleep-interval.')) workarounds.add_option( - '--bypass-geo', - action='store_true', dest='bypass_geo_restriction', default=True, + '--geo-bypass', + action='store_true', dest='geo_bypass', default=True, help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') workarounds.add_option( - '--no-bypass-geo', - action='store_false', dest='bypass_geo_restriction', default=True, + '--no-geo-bypass', + action='store_false', dest='geo_bypass', default=True, help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') workarounds.add_option( - '--bypass-geo-as-country', metavar='CODE', - dest='bypass_geo_restriction_as_country', default=None, + '--geo-bypass-country', metavar='CODE', + dest='geo_bypass_country', default=None, help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') From 4248dad92bd87650c791194276296b148f668e68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 03:53:23 +0700 Subject: [PATCH 0302/1696] Improve geo bypass mechanism * Rename options to preffixly match with --geo-verification-proxy * Introduce _GEO_COUNTRIES for extractors * Implement faking IP right away for sites with known geo restriction --- youtube_dl/extractor/common.py | 57 +++++++++++++++++++-------- youtube_dl/extractor/dramafever.py | 3 +- youtube_dl/extractor/go.py | 3 +- youtube_dl/extractor/itv.py | 4 +- youtube_dl/extractor/nrk.py | 4 +- youtube_dl/extractor/ondemandkorea.py | 3 +- youtube_dl/extractor/pbs.py | 5 ++- youtube_dl/extractor/srgssr.py | 6 ++- youtube_dl/extractor/svt.py | 4 +- youtube_dl/extractor/vbox7.py | 3 +- youtube_dl/extractor/vgtv.py | 5 ++- youtube_dl/extractor/viki.py | 2 +- youtube_dl/utils.py | 2 +- 13 files changed, 71 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6eb6a25b8..272da74b6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -323,10 +323,15 @@ class InfoExtractor(object): _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. - _BYPASS_GEO attribute may be set to False in order to disable + _GEO_BYPASS attribute may be set to False in order to disable geo restriction bypass mechanisms for a particular extractor. Though it won't disable explicit geo restriction bypass based on - country code provided with geo_bypass_country. + country code provided with geo_bypass_country. (experimental) + + _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted + countries for this extractor. One of these countries will be used by + geo restriction bypass mechanism right away in order to bypass + geo restriction, of course, if the mechanism is not disabled. (experimental) Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. @@ -335,7 +340,8 @@ class InfoExtractor(object): _ready = False _downloader = None _x_forwarded_for_ip = None - _BYPASS_GEO = True + _GEO_BYPASS = True + _GEO_COUNTRIES = None _WORKING = True def __init__(self, downloader=None): @@ -370,13 +376,27 @@ class InfoExtractor(object): def initialize(self): """Initializes an instance (authentication, etc).""" + self.__initialize_geo_bypass() + if not self._ready: + self._real_initialize() + self._ready = True + + def __initialize_geo_bypass(self): if not self._x_forwarded_for_ip: country_code = self._downloader.params.get('geo_bypass_country', None) + # If there is no explicit country for geo bypass specified and + # the extractor is known to be geo restricted let's fake IP + # as X-Forwarded-For right away. + if (not country_code and + self._GEO_BYPASS and + self._downloader.params.get('geo_bypass', True) and + self._GEO_COUNTRIES): + country_code = random.choice(self._GEO_COUNTRIES) if country_code: self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) - if not self._ready: - self._real_initialize() - self._ready = True + if self._downloader.params.get('verbose', False): + self._downloader.to_stdout( + '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) def extract(self, url): """Extracts URL information and returns it in list of dicts.""" @@ -389,16 +409,8 @@ class InfoExtractor(object): ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip return ie_result except GeoRestrictedError as e: - if (not self._downloader.params.get('geo_bypass_country', None) and - self._BYPASS_GEO and - self._downloader.params.get('geo_bypass', True) and - not self._x_forwarded_for_ip and - e.countries): - self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries)) - if self._x_forwarded_for_ip: - self.report_warning( - 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) - continue + if self.__maybe_fake_ip_and_retry(e.countries): + continue raise except ExtractorError: raise @@ -407,6 +419,19 @@ class InfoExtractor(object): except (KeyError, StopIteration) as e: raise ExtractorError('An extractor error has occurred.', cause=e) + def __maybe_fake_ip_and_retry(self, countries): + if (not self._downloader.params.get('geo_bypass_country', None) and + self._GEO_BYPASS and + self._downloader.params.get('geo_bypass', True) and + not self._x_forwarded_for_ip and + countries): + self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries)) + if self._x_forwarded_for_ip: + self.report_warning( + 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) + return True + return False + def set_downloader(self, downloader): """Sets the downloader for this IE.""" self._downloader = downloader diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 755db806a..e7abc8889 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -20,6 +20,7 @@ from ..utils import ( class DramaFeverBaseIE(AMPIE): _LOGIN_URL = 'https://www.dramafever.com/accounts/login/' _NETRC_MACHINE = 'dramafever' + _GEO_COUNTRIES = ['US', 'CA'] _CONSUMER_SECRET = 'DA59dtVXYLxajktV' @@ -118,7 +119,7 @@ class DramaFeverIE(DramaFeverBaseIE): if isinstance(e.cause, compat_HTTPError): self.raise_geo_restricted( msg='Currently unavailable in your country', - countries=['US', 'CA']) + countries=self._GEO_COUNTRIES) raise series_id, episode_number = video_id.split('.') diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index ec902c670..b205bfc7c 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -37,6 +37,7 @@ class GoIE(AdobePassIE): } } _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) + _GEO_COUNTRIES = ['US'] _TESTS = [{ 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', 'info_dict': { @@ -104,7 +105,7 @@ class GoIE(AdobePassIE): for error in errors: if error.get('code') == 1002: self.raise_geo_restricted( - error['message'], countries=['US']) + error['message'], countries=self._GEO_COUNTRIES) error_message = ', '.join([error['message'] for error in errors]) raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) asset_url += '?' + entitlement['uplynkData']['sessionKey'] diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py index aabde15f3..021c6b278 100644 --- a/youtube_dl/extractor/itv.py +++ b/youtube_dl/extractor/itv.py @@ -24,6 +24,7 @@ from ..utils import ( class ITVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)' + _GEO_COUNTRIES = ['GB'] _TEST = { 'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', 'info_dict': { @@ -101,7 +102,8 @@ class ITVIE(InfoExtractor): fault_code = xpath_text(resp_env, './/faultcode') fault_string = xpath_text(resp_env, './/faultstring') if fault_code == 'InvalidGeoRegion': - self.raise_geo_restricted(msg=fault_string, countries=['GB']) + self.raise_geo_restricted( + msg=fault_string, countries=self._GEO_COUNTRIES) raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string)) title = xpath_text(playlist, 'EpisodeTitle', fatal=True) video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 78ece33e1..13af9ed1f 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -14,6 +14,7 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): + _GEO_COUNTRIES = ['NO'] def _real_extract(self, url): video_id = self._match_id(url) @@ -93,7 +94,8 @@ class NRKBaseIE(InfoExtractor): # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* if 'IsGeoBlocked' in message_type: self.raise_geo_restricted( - msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO']) + msg=MESSAGES.get('ProgramIsGeoBlocked'), + countries=self._GEO_COUNTRIES) raise ExtractorError( '%s said: %s' % (self.IE_NAME, MESSAGES.get( message_type, message_type)), diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py index 0c85d549e..df1ce3c1d 100644 --- a/youtube_dl/extractor/ondemandkorea.py +++ b/youtube_dl/extractor/ondemandkorea.py @@ -10,6 +10,7 @@ from ..utils import ( class OnDemandKoreaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html' + _GEO_COUNTRIES = ['US', 'CA'] _TEST = { 'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html', 'info_dict': { @@ -36,7 +37,7 @@ class OnDemandKoreaIE(InfoExtractor): if 'msg_block_01.png' in webpage: self.raise_geo_restricted( msg='This content is not available in your region', - countries=['US', 'CA']) + countries=self._GEO_COUNTRIES) if 'This video is only available to ODK PLUS members.' in webpage: raise ExtractorError( diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 64f47bae3..3e51b4dd7 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -193,6 +193,8 @@ class PBSIE(InfoExtractor): ) ''' % '|'.join(list(zip(*_STATIONS))[0]) + _GEO_COUNTRIES = ['US'] + _TESTS = [ { 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', @@ -492,7 +494,8 @@ class PBSIE(InfoExtractor): message = self._ERRORS.get( redirect_info['http_code'], redirect_info['message']) if redirect_info['http_code'] == 403: - self.raise_geo_restricted(msg=message, countries=['US']) + self.raise_geo_restricted( + msg=message, countries=self._GEO_COUNTRIES) raise ExtractorError( '%s said: %s' % (self.IE_NAME, message), expected=True) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index a35a0a538..bb73eb1d5 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -14,7 +14,8 @@ from ..utils import ( class SRGSSRIE(InfoExtractor): _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)' - _BYPASS_GEO = False + _GEO_BYPASS = False + _GEO_COUNTRIES = ['CH'] _ERRORS = { 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.', @@ -43,7 +44,8 @@ class SRGSSRIE(InfoExtractor): if media_data.get('block') and media_data['block'] in self._ERRORS: message = self._ERRORS[media_data['block']] if media_data['block'] == 'GEOBLOCK': - self.raise_geo_restricted(msg=message, countries=['CH']) + self.raise_geo_restricted( + msg=message, countries=self._GEO_COUNTRIES) raise ExtractorError( '%s said: %s' % (self.IE_NAME, message), expected=True) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index f2a2200bf..9e2c9fcc6 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -13,6 +13,7 @@ from ..utils import ( class SVTBaseIE(InfoExtractor): + _GEO_COUNTRIES = ['SE'] def _extract_video(self, video_info, video_id): formats = [] for vr in video_info['videoReferences']: @@ -39,7 +40,8 @@ class SVTBaseIE(InfoExtractor): }) if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): self.raise_geo_restricted( - 'This video is only available in Sweden', countries=['SE']) + 'This video is only available in Sweden', + countries=self._GEO_COUNTRIES) self._sort_formats(formats) subtitles = {} diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index f86d804c1..8152acefd 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor): ) (?P<id>[\da-fA-F]+) ''' + _GEO_COUNTRIES = ['BG'] _TESTS = [{ 'url': 'http://vbox7.com/play:0946fff23c', 'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf', @@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor): video_url = video['src'] if '/na.mp4' in video_url: - self.raise_geo_restricted(countries=['BG']) + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) uploader = video.get('uploader') diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 1709fd6bb..0f8c156a7 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -14,7 +14,7 @@ from ..utils import ( class VGTVIE(XstreamIE): IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet' - _BYPASS_GEO = False + _GEO_BYPASS = False _HOST_TO_APPNAME = { 'vgtv.no': 'vgtv', @@ -218,7 +218,8 @@ class VGTVIE(XstreamIE): properties = try_get( data, lambda x: x['streamConfiguration']['properties'], list) if properties and 'geoblocked' in properties: - raise self.raise_geo_restricted(countries=['NO']) + raise self.raise_geo_restricted( + countries=[host.rpartition('.')[-1].partition('/')[0].upper()]) self._sort_formats(info['formats']) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 68a74e246..e9c8bf824 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor): _APP_VERSION = '2.2.5.1428709186' _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)' - _BYPASS_GEO = False + _GEO_BYPASS = False _NETRC_MACHINE = 'viki' _token = None diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index cbf7639c5..17b83794a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3291,7 +3291,7 @@ class GeoUtils(object): addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0] addr_max = addr_min | (0xffffffff >> int(preflen)) return compat_str(socket.inet_ntoa( - compat_struct_pack('!I', random.randint(addr_min, addr_max)))) + compat_struct_pack('!L', random.randint(addr_min, addr_max)))) class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): From 0aa10994f452b4ca978baf124df0cb2239d49305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 03:58:17 +0700 Subject: [PATCH 0303/1696] [options] Move geo restriction related options to separate section --- youtube_dl/options.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index ae3f50754..2c880d06a 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -228,17 +228,29 @@ def parseOpts(overrideArguments=None): action='store_const', const='::', dest='source_address', help='Make all connections via IPv6', ) - network.add_option( + + geo = optparse.OptionGroup(parser, 'Geo Restriction') + geo.add_option( '--geo-verification-proxy', dest='geo_verification_proxy', default=None, metavar='URL', help='Use this proxy to verify the IP address for some geo-restricted sites. ' - 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.' - ) - network.add_option( + 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.') + geo.add_option( '--cn-verification-proxy', dest='cn_verification_proxy', default=None, metavar='URL', - help=optparse.SUPPRESS_HELP, - ) + help=optparse.SUPPRESS_HELP) + geo.add_option( + '--geo-bypass', + action='store_true', dest='geo_bypass', default=True, + help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') + geo.add_option( + '--no-geo-bypass', + action='store_false', dest='geo_bypass', default=True, + help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') + geo.add_option( + '--geo-bypass-country', metavar='CODE', + dest='geo_bypass_country', default=None, + help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') selection = optparse.OptionGroup(parser, 'Video Selection') selection.add_option( @@ -549,18 +561,6 @@ def parseOpts(overrideArguments=None): 'Upper bound of a range for randomized sleep before each download ' '(maximum possible number of seconds to sleep). Must only be used ' 'along with --min-sleep-interval.')) - workarounds.add_option( - '--geo-bypass', - action='store_true', dest='geo_bypass', default=True, - help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') - workarounds.add_option( - '--no-geo-bypass', - action='store_false', dest='geo_bypass', default=True, - help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)') - workarounds.add_option( - '--geo-bypass-country', metavar='CODE', - dest='geo_bypass_country', default=None, - help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)') verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') verbosity.add_option( @@ -848,6 +848,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(general) parser.add_option_group(network) + parser.add_option_group(geo) parser.add_option_group(selection) parser.add_option_group(downloader) parser.add_option_group(filesystem) From 553f6dbac7afac84994eae18f551799f807d1503 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 04:18:22 +0700 Subject: [PATCH 0304/1696] [downloader/dash] Honor HTTP headers when downloading fragments For example, https://www.oppetarkiv.se/video/1196142/natten-ar-dagens-mor --- youtube_dl/downloader/dash.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 8437dde30..e2ddc369e 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -43,7 +43,10 @@ class DashSegmentsFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(target_filename, {'url': segment_url}) + success = ctx['dl'].download(target_filename, { + 'url': segment_url, + 'http_headers': info_dict.get('http_headers'), + }) if not success: return False down, target_sanitized = sanitize_open(target_filename, 'rb') From de64e23c5663ceb4f62264077a7993d13ace0d6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 04:18:36 +0700 Subject: [PATCH 0305/1696] [downloader/ism] Honor HTTP headers when downloading fragments --- youtube_dl/downloader/ism.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 93cac5e98..63a636cb7 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -238,7 +238,10 @@ class IsmFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(target_filename, {'url': segment_url}) + success = ctx['dl'].download(target_filename, { + 'url': segment_url, + 'http_headers': info_dict.get('http_headers'), + }) if not success: return False down, target_sanitized = sanitize_open(target_filename, 'rb') From f1a78ee4ef3bfd8e7ff06a3014d96c3cf11b4d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 06:16:00 +0700 Subject: [PATCH 0306/1696] [tv4] Switch to hls3 protocol (closes #12177) --- youtube_dl/extractor/tv4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index ad79db92b..7918e3d86 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -80,7 +80,7 @@ class TV4IE(InfoExtractor): subtitles = {} formats = [] # http formats are linked with unresolvable host - for kind in ('hls', ''): + for kind in ('hls3', ''): data = self._download_json( 'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id, video_id, 'Downloading sources JSON', query={ From c58b7ffef43f60fa6a183c849cfdca42e36eae0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Feb 2017 06:24:38 +0700 Subject: [PATCH 0307/1696] [tv4] Bypass geo restriction and improve detection --- youtube_dl/extractor/tv4.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 7918e3d86..7aeb2c620 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -24,6 +24,7 @@ class TV4IE(InfoExtractor): sport/| ) )(?P<id>[0-9]+)''' + _GEO_COUNTRIES = ['SE'] _TESTS = [ { 'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650', @@ -71,10 +72,6 @@ class TV4IE(InfoExtractor): 'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON') - # If is_geo_restricted is true, it doesn't necessarily mean we can't download it - if info.get('is_geo_restricted'): - self.report_warning('This content might not be available in your country due to licensing restrictions.') - title = info['title'] subtitles = {} @@ -113,6 +110,10 @@ class TV4IE(InfoExtractor): 'url': manifest_url, 'ext': 'vtt', }]}) + + if not formats and info.get('is_geo_restricted'): + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + self._sort_formats(formats) return { From 8936f68a0ba3284c88ec619fb4cc22eb0499e7f3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 21 Oct 2015 00:37:28 +0800 Subject: [PATCH 0308/1696] [travis] Run tests in parallel [test_download] Print test names in case of network errors [test_download] Add comments for nose parameters [test_download] Modify outtmpl to prevent info JSON filename conflicts Thanks @jaimeMF for the idea. [travis] Only download tests should be run in parallel --- devscripts/run_tests.sh | 4 +++- test/test_download.py | 11 ++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh index 7f4c1e083..c60807215 100755 --- a/devscripts/run_tests.sh +++ b/devscripts/run_tests.sh @@ -3,6 +3,7 @@ DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter" test_set="" +multiprocess_args="" case "$YTDL_TEST_SET" in core) @@ -10,10 +11,11 @@ case "$YTDL_TEST_SET" in ;; download) test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py" + multiprocess_args="--processes=4 --process-timeout=540" ;; *) break ;; esac -nosetests test --verbose $test_set +nosetests test --verbose $test_set $multiprocess_args diff --git a/test/test_download.py b/test/test_download.py index 463952989..30034f978 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -65,6 +65,10 @@ defs = gettestcases() class TestDownload(unittest.TestCase): + # Parallel testing in nosetests. See + # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html + _multiprocess_shared_ = True + maxDiff = None def setUp(self): @@ -73,7 +77,7 @@ class TestDownload(unittest.TestCase): # Dynamically generate tests -def generator(test_case): +def generator(test_case, tname): def test_template(self): ie = youtube_dl.extractor.get_info_extractor(test_case['name']) @@ -102,6 +106,7 @@ def generator(test_case): return params = get_params(test_case.get('params', {})) + params['outtmpl'] = tname + '_' + params['outtmpl'] if is_playlist and 'playlist' not in test_case: params.setdefault('extract_flat', 'in_playlist') params.setdefault('skip_download', True) @@ -146,7 +151,7 @@ def generator(test_case): raise if try_num == RETRIES: - report_warning('Failed due to network errors, skipping...') + report_warning('%s failed due to network errors, skipping...' % tname) return print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) @@ -221,12 +226,12 @@ def generator(test_case): # And add them to TestDownload for n, test_case in enumerate(defs): - test_method = generator(test_case) tname = 'test_' + str(test_case['name']) i = 1 while hasattr(TestDownload, tname): tname = 'test_%s_%d' % (test_case['name'], i) i += 1 + test_method = generator(test_case, tname) test_method.__name__ = str(tname) setattr(TestDownload, test_method.__name__, test_method) del test_method From 983e9b774643fc588fbfb51d314381025ffac248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 00:59:31 +0700 Subject: [PATCH 0309/1696] [nrk] Update _API_HOST and relax _VALID_URL --- youtube_dl/extractor/nrk.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 13af9ed1f..7b98626f2 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -164,12 +164,12 @@ class NRKIE(NRKBaseIE): https?:// (?: (?:www\.)?nrk\.no/video/PS\*| - v8-psapi\.nrk\.no/mediaelement/ + v8[-.]psapi\.nrk\.no/mediaelement/ ) ) - (?P<id>[^/?#&]+) + (?P<id>[^?#&]+) ''' - _API_HOST = 'v8.psapi.nrk.no' + _API_HOST = 'v8-psapi.nrk.no' _TESTS = [{ # video 'url': 'http://www.nrk.no/video/PS*150533', @@ -195,6 +195,9 @@ class NRKIE(NRKBaseIE): }, { 'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9', 'only_matching': True, + }, { + 'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70', + 'only_matching': True, }, { 'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9', 'only_matching': True, From 8ffb8e63fe2853f9e51420ba224db428f1241c35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 01:00:53 +0700 Subject: [PATCH 0310/1696] [prosiebensat1] Throw ExtractionError on unsupported page type (closes #12180) --- youtube_dl/extractor/prosiebensat1.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 5091d8456..1245309a7 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -424,3 +424,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): return self._extract_clip(url, webpage) elif page_type == 'playlist': return self._extract_playlist(url, webpage) + else: + raise ExtractorError( + 'Unsupported page type %s' % page_type, expected=True) From c78dd3549155d4cb8f70707c1b4085f9f974db2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 02:25:39 +0700 Subject: [PATCH 0311/1696] [nrk] PEP 8 --- youtube_dl/extractor/nrk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 7b98626f2..7fe79cb53 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -15,6 +15,7 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): _GEO_COUNTRIES = ['NO'] + def _real_extract(self, url): video_id = self._match_id(url) From 6d4c259765de86bdb8a10e71bfbc7b6e196f6967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 02:25:55 +0700 Subject: [PATCH 0312/1696] [svt] PEP 8 --- youtube_dl/extractor/svt.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 9e2c9fcc6..1b5afb73e 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -14,6 +14,7 @@ from ..utils import ( class SVTBaseIE(InfoExtractor): _GEO_COUNTRIES = ['SE'] + def _extract_video(self, video_info, video_id): formats = [] for vr in video_info['videoReferences']: From 2cc7fcd338e8690a5c211b95fb9e0dcdc5d98ef5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 03:06:52 +0700 Subject: [PATCH 0313/1696] [commonmistakes] Disable UnicodeBOM extractor test for python 3.2 --- youtube_dl/extractor/commonmistakes.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py index d3ed4a9a4..79f7a9cd1 100644 --- a/youtube_dl/extractor/commonmistakes.py +++ b/youtube_dl/extractor/commonmistakes.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import sys + from .common import InfoExtractor from ..utils import ExtractorError @@ -33,7 +35,9 @@ class UnicodeBOMIE(InfoExtractor): IE_DESC = False _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$' - _TESTS = [{ + # Disable test for python 3.2 since BOM is broken in re in this version + # (see https://github.com/rg3/youtube-dl/issues/9751) + _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', 'only_matching': True, }] From 82f662182b9ade630b37af81ebf8ae7ae6468898 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 23:16:14 +0700 Subject: [PATCH 0314/1696] [iprima] Modernize --- youtube_dl/extractor/iprima.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 0fe576883..7afa2def0 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -8,7 +8,6 @@ from .common import InfoExtractor from ..utils import ( determine_ext, js_to_json, - sanitized_Request, ) @@ -38,11 +37,13 @@ class IPrimaIE(InfoExtractor): video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id') - req = sanitized_Request( - 'http://play.iprima.cz/prehravac/init?_infuse=1' - '&_ts=%s&productId=%s' % (round(time.time()), video_id)) - req.add_header('Referer', url) - playerpage = self._download_webpage(req, video_id, note='Downloading player') + playerpage = self._download_webpage( + 'http://play.iprima.cz/prehravac/init', + video_id, note='Downloading player', query={ + '_infuse': 1, + '_ts': round(time.time()), + 'productId': video_id, + }, headers={'Referer': url}) formats = [] From da42ff066811490064e0c3039b9db5c0e9a69f58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 23:17:19 +0700 Subject: [PATCH 0315/1696] [iprima] Improve geo restriction detection and disable geo bypass --- youtube_dl/extractor/iprima.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 7afa2def0..a29e6a5ba 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -13,6 +13,7 @@ from ..utils import ( class IPrimaIE(InfoExtractor): _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)' + _GEO_BYPASS = False _TESTS = [{ 'url': 'http://play.iprima.cz/gondici-s-r-o-33', @@ -28,6 +29,10 @@ class IPrimaIE(InfoExtractor): }, { 'url': 'http://play.iprima.cz/particka/particka-92', 'only_matching': True, + }, { + # geo restricted + 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1', + 'only_matching': True, }] def _real_extract(self, url): @@ -83,7 +88,7 @@ class IPrimaIE(InfoExtractor): extract_formats(src) if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage: - self.raise_geo_restricted() + self.raise_geo_restricted(countries=['CZ']) self._sort_formats(formats) From 3ccdde8cb76cacb7b2b64469ca51d3b1877da1f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Feb 2017 23:21:15 +0700 Subject: [PATCH 0316/1696] [extractor/common] Emphasize geo bypass APIs are experimental --- youtube_dl/extractor/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 272da74b6..1ae264722 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -333,6 +333,9 @@ class InfoExtractor(object): geo restriction bypass mechanism right away in order to bypass geo restriction, of course, if the mechanism is not disabled. (experimental) + NB: both these geo attributes are experimental and may change in future + or be completely removed. + Finally, the _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. """ From 6926304472d4598f095abc7115ca0f36068271d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 00:54:43 +0700 Subject: [PATCH 0317/1696] [spankbang] Make uploader optional (closes #12193) --- youtube_dl/extractor/spankbang.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 123c33ac3..3394c7e6b 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -23,6 +23,10 @@ class SpankBangIE(InfoExtractor): # 480p only 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang', 'only_matching': True, + }, { + # no uploader + 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2', + 'only_matching': True, }] def _real_extract(self, url): @@ -48,7 +52,7 @@ class SpankBangIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) uploader = self._search_regex( r'class="user"[^>]*><img[^>]+>([^<]+)', - webpage, 'uploader', fatal=False) + webpage, 'uploader', default=None) age_limit = self._rta_search(webpage) From 890d44b005c3073442064a847f2e0204619a8b47 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 20 Feb 2017 19:00:06 +0100 Subject: [PATCH 0318/1696] [adobepass] add support for Time Warner Cable(closes #12191) --- youtube_dl/extractor/adobepass.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 12eeab271..4d655bd5e 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -31,6 +31,11 @@ MSO_INFO = { 'username_field': 'user', 'password_field': 'passwd', }, + 'TWC': { + 'name': 'Time Warner Cable | Spectrum', + 'username_field': 'Ecom_User_ID', + 'password_field': 'Ecom_Password', + }, 'thr030': { 'name': '3 Rivers Communications' }, From e469ab25280433781881d0c3ea6fd423ac5fea71 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 14:38:00 +0100 Subject: [PATCH 0319/1696] [ninecninemedia] use geo bypass mechanism --- youtube_dl/extractor/ninecninemedia.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/ninecninemedia.py b/youtube_dl/extractor/ninecninemedia.py index ec4d675e2..d9943fc2c 100644 --- a/youtube_dl/extractor/ninecninemedia.py +++ b/youtube_dl/extractor/ninecninemedia.py @@ -19,6 +19,7 @@ class NineCNineMediaBaseIE(InfoExtractor): class NineCNineMediaStackIE(NineCNineMediaBaseIE): IE_NAME = '9c9media:stack' + _GEO_COUNTRIES = ['CA'] _VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)' def _real_extract(self, url): From e39b5d4ab83de7a466c6d4c9528d385758566b22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 23:00:43 +0700 Subject: [PATCH 0320/1696] [extractor/common] Allow calling _initialize_geo_bypass from extractors (#11970) --- youtube_dl/extractor/common.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1ae264722..86aff3312 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -379,12 +379,31 @@ class InfoExtractor(object): def initialize(self): """Initializes an instance (authentication, etc).""" - self.__initialize_geo_bypass() + self._initialize_geo_bypass(self._GEO_COUNTRIES) if not self._ready: self._real_initialize() self._ready = True - def __initialize_geo_bypass(self): + def _initialize_geo_bypass(self, countries): + """ + Initialize geo restriction bypass mechanism. + + This method is used to initialize geo bypass mechanism based on faking + X-Forwarded-For HTTP header. A random country from provided country list + is selected and a random IP brlonging to this country is generated. This + IP will be passed as X-Forwarded-For HTTP header in all subsequent + HTTP requests. + Method does nothing if no countries are specified. + + This method will be used for initial geo bypass mechanism initialization + during the instance initialization with _GEO_COUNTRIES. + + You may also manually call it from extractor's code if geo countries + information is not available beforehand (e.g. obtained during + extraction) or due to some another reason. + """ + if not countries: + return if not self._x_forwarded_for_ip: country_code = self._downloader.params.get('geo_bypass_country', None) # If there is no explicit country for geo bypass specified and @@ -393,8 +412,8 @@ class InfoExtractor(object): if (not country_code and self._GEO_BYPASS and self._downloader.params.get('geo_bypass', True) and - self._GEO_COUNTRIES): - country_code = random.choice(self._GEO_COUNTRIES) + countries): + country_code = random.choice(countries) if country_code: self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if self._downloader.params.get('verbose', False): From dc0a869e5ee7a75218a759706bb11f17c4de6b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 23:05:31 +0700 Subject: [PATCH 0321/1696] [extractor/common] Fix typo --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 86aff3312..6d4789d96 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -390,7 +390,7 @@ class InfoExtractor(object): This method is used to initialize geo bypass mechanism based on faking X-Forwarded-For HTTP header. A random country from provided country list - is selected and a random IP brlonging to this country is generated. This + is selected and a random IP belonging to this country is generated. This IP will be passed as X-Forwarded-For HTTP header in all subsequent HTTP requests. Method does nothing if no countries are specified. From 336a76551b92db1c040cbf3c4a9b1857e125ad45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 23:09:41 +0700 Subject: [PATCH 0322/1696] [extractor/common] Do not quit _initialize_geo_bypass on empty countries --- youtube_dl/extractor/common.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6d4789d96..a34fbbc9b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -393,7 +393,6 @@ class InfoExtractor(object): is selected and a random IP belonging to this country is generated. This IP will be passed as X-Forwarded-For HTTP header in all subsequent HTTP requests. - Method does nothing if no countries are specified. This method will be used for initial geo bypass mechanism initialization during the instance initialization with _GEO_COUNTRIES. @@ -402,8 +401,6 @@ class InfoExtractor(object): information is not available beforehand (e.g. obtained during extraction) or due to some another reason. """ - if not countries: - return if not self._x_forwarded_for_ip: country_code = self._downloader.params.get('geo_bypass_country', None) # If there is no explicit country for geo bypass specified and From eea0716cae1290fe08faea89e24a58ec91098638 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 23:14:33 +0700 Subject: [PATCH 0323/1696] [extractor/common] Print origin country for fake IP --- youtube_dl/extractor/common.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a34fbbc9b..4252d6825 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -415,7 +415,8 @@ class InfoExtractor(object): self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if self._downloader.params.get('verbose', False): self._downloader.to_stdout( - '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) + '[debug] Using fake IP %s (%s) as X-Forwarded-For.' + % (self._x_forwarded_for_ip, country_code.upper())) def extract(self, url): """Extracts URL information and returns it in list of dicts.""" @@ -444,10 +445,12 @@ class InfoExtractor(object): self._downloader.params.get('geo_bypass', True) and not self._x_forwarded_for_ip and countries): - self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries)) + country_code = random.choice(countries) + self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if self._x_forwarded_for_ip: self.report_warning( - 'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip) + 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.' + % (self._x_forwarded_for_ip, country_code.upper())) return True return False From 159aaaa9d09ce5843ec843d6e10030e229709e17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 23:46:58 +0700 Subject: [PATCH 0324/1696] [ChangeLog] Actualize --- ChangeLog | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2c90f791d..2b02994e0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,49 @@ +version <unreleased> + +Core +* [extractor/common] Allow calling _initialize_geo_bypass from extractors + (#11970) ++ [adobepass] Add support for Time Warner Cable (#12191) ++ [travis] Run tests in parallel ++ [downloader/ism] Honor HTTP headers when downloading fragments ++ [downloader/dash] Honor HTTP headers when downloading fragments ++ [utils] Add GeoUtils class for working with geo tools and GeoUtils.random_ipv4 ++ Add option --geo-bypass-country for explicit geo bypass on behalf of + specified country ++ Add options to control geo bypass mechanism --geo-bypass and --no-geo-bypass ++ Add experimental geo restriction bypass mechanism based on faking + X-Forwarded-For HTTP header ++ [utils] Introduce GeoRestrictedError for geo restricted videos ++ [utils] Introduce YoutubeDLError base class for all youtube-dl exceptions + +Extractors ++ [ninecninemedia] Use geo bypass mechanism +* [spankbang] Make uploader optional (#12193) ++ [iprima] Improve geo restriction detection and disable geo bypass +* [iprima] Modernize +* [commonmistakes] Disable UnicodeBOM extractor test for python 3.2 ++ [prosiebensat1] Throw ExtractionError on unsupported page type (#12180) +* [nrk] Update _API_HOST and relax _VALID_URL ++ [tv4] Bypass geo restriction and improve detection +* [tv4] Switch to hls3 protocol (#12177) ++ [viki] Improve geo restriction detection ++ [vgtv] Improve geo restriction detection ++ [srgssr] Improve geo restriction detection ++ [vbox7] Improve geo restriction detection and use geo bypass mechanism ++ [svt] Improve geo restriction detection and use geo bypass mechanism ++ [pbs] Improve geo restriction detection and use geo bypass mechanism ++ [ondemandkorea] Improve geo restriction detection and use geo bypass mechanism ++ [nrk] Improve geo restriction detection and use geo bypass mechanism ++ [itv] Improve geo restriction detection and use geo bypass mechanism ++ [go] Improve geo restriction detection and use geo bypass mechanism ++ [dramafever] Improve geo restriction detection and use geo bypass mechanism +* [brightcove:legacy] Restrict videoPlayer value (#12040) ++ [tvn24] Add support for tvn24.pl and tvn24bis.pl (#11679) ++ [thisav] Add support for HTML5 media (#11771) +* [metacafe] Bypass family filter (#10371) +* [viceland] Improve info extraction + + version 2017.02.17 Extractors From 8c6c88c7dae595d5cb7d5926eb00fbaf40103f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Feb 2017 23:48:24 +0700 Subject: [PATCH 0325/1696] release 2017.02.21 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 34 +++++++++++++++++++++++----------- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6f1361b32..8b6d14fa2 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.17** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.21** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.17 +[debug] youtube-dl version 2017.02.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 2b02994e0..a479d274f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.21 Core * [extractor/common] Allow calling _initialize_geo_bypass from extractors diff --git a/README.md b/README.md index c2a1a6b02..1eccfd287 100644 --- a/README.md +++ b/README.md @@ -99,11 +99,21 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --source-address IP Client-side IP address to bind to -4, --force-ipv4 Make all connections via IPv4 -6, --force-ipv6 Make all connections via IPv6 + +## Geo Restriction: --geo-verification-proxy URL Use this proxy to verify the IP address for some geo-restricted sites. The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. + --geo-bypass Bypass geographic restriction via faking + X-Forwarded-For HTTP header (experimental) + --no-geo-bypass Do not bypass geographic restriction via + faking X-Forwarded-For HTTP header + (experimental) + --geo-bypass-country CODE Force bypass geographic restriction with + explicitly provided two-letter ISO 3166-2 + country code (experimental) ## Video Selection: --playlist-start NUMBER Playlist video to start at (default is 1) @@ -140,17 +150,19 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo check if the key is not present, key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare - against a number, and & to require multiple - matches. Values which are not known are - excluded unless you put a question mark (?) - after the operator. For example, to only - match videos that have been liked more than - 100 times and disliked less than 50 times - (or the dislike functionality is not - available at the given service), but who - also have a description, use --match-filter - "like_count > 100 & dislike_count <? 50 & - description" . + against a number, key = 'LITERAL' (like + "uploader = 'Mike Smith'", also works with + !=) to match against a string literal and & + to require multiple matches. Values which + are not known are excluded unless you put a + question mark (?) after the operator. For + example, to only match videos that have + been liked more than 100 times and disliked + less than 50 times (or the dislike + functionality is not available at the given + service), but who also have a description, + use --match-filter "like_count > 100 & + dislike_count <? 50 & description" . --no-playlist Download only the video, if the URL refers to a video and a playlist. --yes-playlist Download the playlist, if the URL refers to diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5a436e8f7..1eb9c2cdd 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -804,6 +804,7 @@ - **TVCArticle** - **tvigle**: Интернет-телевидение Tvigle.ru - **tvland.com** + - **TVN24** - **TVNoe** - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 530e1856b..a85aebaa3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.17' +__version__ = '2017.02.21' From 3444844b04ae482edc5a353d9125b45ba47cd8d8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 17:47:14 +0100 Subject: [PATCH 0326/1696] [limelight] extract PlaylistService errors --- youtube_dl/extractor/limelight.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index a3712665b..422be2528 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -4,11 +4,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( determine_ext, float_or_none, int_or_none, unsmuggle_url, + ExtractorError, ) @@ -20,9 +22,17 @@ class LimelightBaseIE(InfoExtractor): headers = {} if referer: headers['Referer'] = referer - return self._download_json( - self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), - item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers) + try: + return self._download_json( + self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), + item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission'] + if error == 'CountryDisabled': + self.raise_geo_restricted() + raise ExtractorError(error, expected=True) + raise def _call_api(self, organization_id, item_id, method): return self._download_json( @@ -213,6 +223,7 @@ class LimelightMediaIE(LimelightBaseIE): def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) + self._initialize_geo_bypass(smuggled_data.get('geo_countries')) pc, mobile, metadata = self._extract( video_id, 'getPlaylistByMediaId', From 33dc173cdc84efbc1f794033480af0e4af459891 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 17:50:36 +0100 Subject: [PATCH 0327/1696] [telequebec] use geo bypass mechanism --- youtube_dl/extractor/telequebec.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index 4043fcb92..82d73c31d 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + smuggle_url, +) class TeleQuebecIE(InfoExtractor): @@ -28,7 +31,7 @@ class TeleQuebecIE(InfoExtractor): return { '_type': 'url_transparent', 'id': media_id, - 'url': 'limelight:media:' + media_data['streamInfo']['sourceId'], + 'url': smuggle_url('limelight:media:' + media_data['streamInfo']['sourceId'], {'geo_countries': ['CA']}), 'title': media_data['title'], 'description': media_data.get('descriptions', [{'text': None}])[0].get('text'), 'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000), From 86466a8b6f313d0d8c80823e8e61215ac16046a5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 17:50:53 +0100 Subject: [PATCH 0328/1696] [cbc] use geo bypass mechanism --- youtube_dl/extractor/cbc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index cf678e7f8..4d93c7744 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -195,6 +195,7 @@ class CBCPlayerIE(InfoExtractor): class CBCWatchBaseIE(InfoExtractor): _device_id = None _device_token = None + _GEO_COUNTRIES = ['CA'] _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/' _NS_MAP = { 'media': 'http://search.yahoo.com/mrss/', From 7345d6d465c4889ae06672a11f8b6e491b0b7fe4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 17:51:40 +0100 Subject: [PATCH 0329/1696] [tfo] Improve geo restriction detection and use geo bypass mechanism --- youtube_dl/extractor/tfo.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tfo.py b/youtube_dl/extractor/tfo.py index 6f1eeac57..0e2370cd8 100644 --- a/youtube_dl/extractor/tfo.py +++ b/youtube_dl/extractor/tfo.py @@ -8,10 +8,12 @@ from ..utils import ( HEADRequest, ExtractorError, int_or_none, + clean_html, ) class TFOIE(InfoExtractor): + _GEO_COUNTRIES = ['CA'] _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)' _TEST = { 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon', @@ -36,7 +38,9 @@ class TFOIE(InfoExtractor): 'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value, }) if infos.get('success') == 0: - raise ExtractorError('%s said: %s' % (self.IE_NAME, infos['msg']), expected=True) + if infos.get('code') == 'ErrGeoBlocked': + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(infos['msg'])), expected=True) video_data = infos['data'] return { From fc320a40d97ed0c439a8d2ace9cfad94a74ff635 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 18:14:55 +0100 Subject: [PATCH 0330/1696] Revert "[cbc] use geo bypass mechanism" This reverts commit 86466a8b6f313d0d8c80823e8e61215ac16046a5. --- youtube_dl/extractor/cbc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 4d93c7744..cf678e7f8 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -195,7 +195,6 @@ class CBCPlayerIE(InfoExtractor): class CBCWatchBaseIE(InfoExtractor): _device_id = None _device_token = None - _GEO_COUNTRIES = ['CA'] _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/' _NS_MAP = { 'media': 'http://search.yahoo.com/mrss/', From 31615ac279ac60fbd3925995de2eed69a4b3976a Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 19:36:39 +0100 Subject: [PATCH 0331/1696] [viewster] use geo verifcation headers --- youtube_dl/extractor/viewster.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index 52dd95e2f..fcf0cb100 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -86,7 +86,9 @@ class ViewsterIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) # Get 'api_token' cookie - self._request_webpage(HEADRequest('http://www.viewster.com/'), video_id) + self._request_webpage( + HEADRequest('http://www.viewster.com/'), + video_id, headers=self.geo_verification_headers()) cookies = self._get_cookies('http://www.viewster.com/') self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value) From abd29a2cedaab096e920ed5be9c480921cfacf0a Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Feb 2017 19:37:26 +0100 Subject: [PATCH 0332/1696] [crackle] use geo bypass mechanism --- youtube_dl/extractor/crackle.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index 377fb45e9..f919ed208 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -6,6 +6,7 @@ from ..utils import int_or_none class CrackleIE(InfoExtractor): + _GEO_COUNTRIES = ['US'] _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' _TEST = { 'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934', From 139d8ac106dc173d27cf20361b649c0dbc5f9b67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 01:50:34 +0700 Subject: [PATCH 0333/1696] [setup] Add python 3.6 classifier --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index ce6dd1870..b8c3e0925 100644 --- a/setup.py +++ b/setup.py @@ -130,6 +130,7 @@ setup( 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', ], cmdclass={'build_lazy_extractors': build_lazy_extractors}, From 0d427c83047778d2984df5594b96f119ec7f8771 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 01:51:27 +0700 Subject: [PATCH 0334/1696] [setup] Actualize maintainer info --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b8c3e0925..67d6633ed 100644 --- a/setup.py +++ b/setup.py @@ -107,8 +107,8 @@ setup( url='https://github.com/rg3/youtube-dl', author='Ricardo Garcia', author_email='ytdl@yt-dl.org', - maintainer='Philipp Hagemeister', - maintainer_email='phihag@phihag.de', + maintainer='Sergey M.', + maintainer_email='dstftw@gmail.com', packages=[ 'youtube_dl', 'youtube_dl.extractor', 'youtube_dl.downloader', From 71e9577b94a4792a330e9bdab4674c6893ea5bac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 21:19:52 +0700 Subject: [PATCH 0335/1696] [24video] Add support for 24video.tube (closes #12217) --- youtube_dl/extractor/twentyfourvideo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index a983ebf05..f3541b654 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -12,7 +12,7 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex|tube)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', @@ -37,6 +37,9 @@ class TwentyFourVideoIE(InfoExtractor): }, { 'url': 'http://www.24video.me/video/view/1044982', 'only_matching': True, + }, { + 'url': 'http://www.24video.tube/video/view/2363750', + 'only_matching': True, }] def _real_extract(self, url): From a86e4160880e0747c5a0b774fe8d3b0cbb2990ae Mon Sep 17 00:00:00 2001 From: Tobias Florek <me@ibotty.net> Date: Wed, 22 Feb 2017 15:28:09 +0100 Subject: [PATCH 0336/1696] [vidzi] Add support for vidzi.cc --- youtube_dl/extractor/vidzi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index 1f1828fce..b642caf22 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -13,7 +13,7 @@ from ..utils import ( class VidziIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' _TESTS = [{ 'url': 'http://vidzi.tv/cghql9yq6emu.html', 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660', From 58ad6995cd3974eb9cff64f3fa8d34ec68cb6a03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 21:29:53 +0700 Subject: [PATCH 0337/1696] [vidzi] Add test for #12213 --- youtube_dl/extractor/vidzi.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index b642caf22..d0556297e 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -29,6 +29,9 @@ class VidziIE(InfoExtractor): }, { 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html', 'skip_download': True, + }, { + 'url': 'http://vidzi.cc/cghql9yq6emu.html', + 'skip_download': True, }] def _real_extract(self, url): From 527ef85fe9bc65ed676ab855ee386c7cce8716ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 21:49:30 +0700 Subject: [PATCH 0338/1696] [dailymotion] Make comment count optional (closes #12209) Not served anymore --- youtube_dl/extractor/dailymotion.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 31bf5faf6..b312401dc 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -66,7 +66,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'uploader_id': 'xijv66', 'age_limit': 0, 'view_count': int, - 'comment_count': int, } }, # Vevo video @@ -140,7 +139,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): view_count = str_to_int(view_count_str) comment_count = int_or_none(self._search_regex( r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"', - webpage, 'comment count', fatal=False)) + webpage, 'comment count', default=None)) player_v5 = self._search_regex( [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826 From b5869560a44caaa06b317302425fb472169c2d28 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 23 Feb 2017 00:08:45 +0800 Subject: [PATCH 0339/1696] [crunchyroll] Fix descriptions with double quotes (closes #12124) --- ChangeLog | 6 ++++++ youtube_dl/extractor/crunchyroll.py | 23 +++++++++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index a479d274f..e57e7fece 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [crunchyroll] Fix descriptions with double quotes (#12124) + + version 2017.02.21 Core diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 109d1c5a8..d2b87442d 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -123,7 +123,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'info_dict': { 'id': '645513', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', 'description': 'md5:2d17137920c64f2f49981a7797d275ef', 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', @@ -192,6 +192,21 @@ class CrunchyrollIE(CrunchyrollBaseIE): # geo-restricted (US), 18+ maturity wall, non-premium available 'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617', 'only_matching': True, + }, { + # A description with double quotes + 'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080', + 'info_dict': { + 'id': '535080', + 'ext': 'mp4', + 'title': '11eyes Episode 1 – Piros éjszaka - Red Night', + 'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".', + 'uploader': 'Marvelous AQL Inc.', + 'upload_date': '20091021', + }, + 'params': { + # Just test metadata extraction + 'skip_download': True, + }, }] _FORMAT_IDS = { @@ -362,9 +377,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>', webpage, 'video_title') video_title = re.sub(r' {2,}', ' ', video_title) - video_description = self._html_search_regex( - r'<script[^>]*>\s*.+?\[media_id=%s\].+?"description"\s*:\s*"([^"]+)' % video_id, - webpage, 'description', default=None) + video_description = self._parse_json(self._html_search_regex( + r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id, + webpage, 'description', default='{}'), video_id).get('description') if video_description: video_description = lowercase_escape(video_description.replace(r'\r\n', '\n')) video_upload_date = self._html_search_regex( From 63a29b6118d147404b5fff63d82f098c4bfa3ffc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 23:45:01 +0700 Subject: [PATCH 0340/1696] [ChangeLog] Actualize --- ChangeLog | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ChangeLog b/ChangeLog index e57e7fece..14db5ac13 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,6 +2,15 @@ version <unreleased> Extractors * [crunchyroll] Fix descriptions with double quotes (#12124) +* [dailymotion] Make comment count optional (#12209) ++ [vidzi] Add support for vidzi.cc (#12213) ++ [24video] Add support for 24video.tube (#12217) ++ [crackle] Use geo bypass mechanism ++ [viewster] Use geo verification headers ++ [tfo] Improve geo restriction detection and use geo bypass mechanism ++ [telequebec] Use geo bypass mechanism ++ [limelight] Extract PlaylistService errors and improve geo restriction + detection version 2017.02.21 From 345b24538b24772c6c5917439e62c510437fce04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Feb 2017 23:50:42 +0700 Subject: [PATCH 0341/1696] release 2017.02.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 8b6d14fa2..923f28276 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.21** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.21 +[debug] youtube-dl version 2017.02.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 14db5ac13..cff065171 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.22 Extractors * [crunchyroll] Fix descriptions with double quotes (#12124) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a85aebaa3..fce1b8558 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.21' +__version__ = '2017.02.22' From bc61c80c143d8faed3e264c2487204924acd1eb6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Feb 2017 11:47:06 +0100 Subject: [PATCH 0342/1696] [leeco] raise GeoRestrictedError and use geo bypass mechanism --- youtube_dl/extractor/leeco.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index 4321f90c8..9eda956d2 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -30,7 +30,7 @@ from ..utils import ( class LeIE(InfoExtractor): IE_DESC = '乐视网' _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html' - + _GEO_COUNTRIES = ['CN'] _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html' _TESTS = [{ @@ -126,10 +126,9 @@ class LeIE(InfoExtractor): if playstatus['status'] == 0: flag = playstatus['flag'] if flag == 1: - msg = 'Country %s auth error' % playstatus['country'] + self.raise_geo_restricted() else: - msg = 'Generic error. flag = %d' % flag - raise ExtractorError(msg, expected=True) + raise ExtractorError('Generic error. flag = %d' % flag, expected=True) def _real_extract(self, url): media_id = self._match_id(url) From c59f7036101b5349b3b02a8bd700eff507012e3f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Feb 2017 11:49:35 +0100 Subject: [PATCH 0343/1696] [sohu] raise GeoRestrictedError --- youtube_dl/extractor/sohu.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 30760ca06..7da12cef8 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -108,12 +108,11 @@ class SohuIE(InfoExtractor): if vid_data['play'] != 1: if vid_data.get('status') == 12: raise ExtractorError( - 'Sohu said: There\'s something wrong in the video.', + '%s said: There\'s something wrong in the video.' % self.IE_NAME, expected=True) else: - raise ExtractorError( - 'Sohu said: The video is only licensed to users in Mainland China.', - expected=True) + self.raise_geo_restricted( + '%s said: The video is only licensed to users in Mainland China.' % self.IE_NAME) formats_json = {} for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'): From 30eaa3a7023a3c03c62eb481f6415cb0599e0da5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Feb 2017 11:50:04 +0100 Subject: [PATCH 0344/1696] [mgtv] fix extraction --- youtube_dl/extractor/mgtv.py | 50 +++++++++++++++++------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 659ede8c2..d53d96aae 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -2,16 +2,17 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import int_or_none class MGTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html' IE_DESC = '芒果TV' _TESTS = [{ 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', - 'md5': '1bdadcf760a0b90946ca68ee9a2db41a', + 'md5': 'b1ffc0fc163152acf6beaa81832c9ee7', 'info_dict': { 'id': '3116640', 'ext': 'mp4', @@ -21,48 +22,45 @@ class MGTVIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', }, }, { - # no tbr extracted from stream_url - 'url': 'http://www.mgtv.com/v/1/1/f/3324755.html', + 'url': 'http://www.mgtv.com/b/301817/3826653.html', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) api_data = self._download_json( - 'http://v.api.mgtv.com/player/video', video_id, + 'http://pcweb.api.mgtv.com/player/video', video_id, query={'video_id': video_id}, headers=self.geo_verification_headers())['data'] info = api_data['info'] + title = info['title'].strip() + stream_domain = api_data['stream_domain'][0] formats = [] for idx, stream in enumerate(api_data['stream']): - stream_url = stream.get('url') - if not stream_url: + stream_path = stream.get('url') + if not stream_path: + continue + format_data = self._download_json( + stream_domain + stream_path, video_id, + note='Download video info for format #%d' % idx) + format_url = format_data.get('info') + if not format_url: continue tbr = int_or_none(self._search_regex( - r'(\d+)\.mp4', stream_url, 'tbr', default=None)) - - def extract_format(stream_url, format_id, idx, query={}): - format_info = self._download_json( - stream_url, video_id, - note='Download video info for format %s' % (format_id or '#%d' % idx), - query=query) - return { - 'format_id': format_id, - 'url': format_info['info'], - 'ext': 'mp4', - 'tbr': tbr, - } - - formats.append(extract_format( - stream_url, 'hls-%d' % tbr if tbr else None, idx * 2)) - formats.append(extract_format(stream_url.replace( - '/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031})) + r'_(\d+)_mp4/', format_url, 'tbr', default=None)) + formats.append({ + 'format_id': compat_str(tbr or idx), + 'url': format_url, + 'ext': 'mp4', + 'tbr': tbr, + 'protocol': 'm3u8_native', + }) self._sort_formats(formats) return { 'id': video_id, - 'title': info['title'].strip(), + 'title': title, 'formats': formats, 'description': info.get('desc'), 'duration': int_or_none(info.get('duration')), From 9e03aa75c779b79da79353ef1ecc4520ad06d6d0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Feb 2017 11:54:43 +0100 Subject: [PATCH 0345/1696] [crunchyroll] extract playlist entries ids --- youtube_dl/extractor/crunchyroll.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index d2b87442d..a1fc6a756 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -534,11 +534,11 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>', webpage, 'title') episode_paths = re.findall( - r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"', + r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"', webpage) entries = [ - self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll') - for ep in episode_paths + self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id) + for ep_id, ep in episode_paths ] entries.reverse() From ada77fa544e185a8cd7c3e5d6374e0b6995557a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 18:02:04 +0700 Subject: [PATCH 0346/1696] [instagram] Add support for multi video posts (closes #12226) --- youtube_dl/extractor/instagram.py | 54 +++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 98f408c18..c1921cbcf 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( get_element_by_attribute, int_or_none, @@ -50,6 +51,33 @@ class InstagramIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # multi video post + 'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/', + 'playlist': [{ + 'info_dict': { + 'id': 'BQ0dSaohpPW', + 'ext': 'mp4', + 'title': 'Video 1', + }, + }, { + 'info_dict': { + 'id': 'BQ0dTpOhuHT', + 'ext': 'mp4', + 'title': 'Video 2', + }, + }, { + 'info_dict': { + 'id': 'BQ0dT7RBFeF', + 'ext': 'mp4', + 'title': 'Video 3', + }, + }], + 'info_dict': { + 'id': 'BQ0eAlwhDrw', + 'title': 'Post by instagram', + 'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957', + }, }, { 'url': 'https://instagram.com/p/-Cmh1cukG2/', 'only_matching': True, @@ -113,6 +141,32 @@ class InstagramIE(InfoExtractor): 'timestamp': int_or_none(comment.get('created_at')), } for comment in media.get( 'comments', {}).get('nodes', []) if comment.get('text')] + if not video_url: + edges = try_get( + media, lambda x: x['edge_sidecar_to_children']['edges'], + list) or [] + if edges: + entries = [] + for edge_num, edge in enumerate(edges, start=1): + node = try_get(edge, lambda x: x['node'], dict) + if not node: + continue + node_video_url = try_get(node, lambda x: x['video_url'], compat_str) + if not node_video_url: + continue + entries.append({ + 'id': node.get('shortcode') or node['id'], + 'title': 'Video %d' % edge_num, + 'url': node_video_url, + 'thumbnail': node.get('display_url'), + 'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])), + 'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])), + 'view_count': int_or_none(node.get('video_view_count')), + }) + return self.playlist_result( + entries, video_id, + 'Post by %s' % uploader_id if uploader_id else None, + description) if not video_url: video_url = self._og_search_video_url(webpage, secure=False) From d5fd9a3be305aa8fead8fb70aae64703afe49e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 18:44:04 +0700 Subject: [PATCH 0347/1696] [skylinewebcams] Add extractor (closes #12221) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/skylinewebcams.py | 42 ++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/skylinewebcams.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 55b4782d3..83a170fa7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -852,6 +852,7 @@ from .shared import ( from .showroomlive import ShowRoomLiveIE from .sina import SinaIE from .sixplay import SixPlayIE +from .skylinewebcams import SkylineWebcamsIE from .skynewsarabia import ( SkyNewsArabiaIE, SkyNewsArabiaArticleIE, diff --git a/youtube_dl/extractor/skylinewebcams.py b/youtube_dl/extractor/skylinewebcams.py new file mode 100644 index 000000000..5b4aaac6f --- /dev/null +++ b/youtube_dl/extractor/skylinewebcams.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class SkylineWebcamsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?skylinewebcams\.com/[^/]+/webcam/(?:[^/]+/)+(?P<id>[^/]+)\.html' + _TEST = { + 'url': 'https://www.skylinewebcams.com/it/webcam/italia/lazio/roma/scalinata-piazza-di-spagna-barcaccia.html', + 'info_dict': { + 'id': 'scalinata-piazza-di-spagna-barcaccia', + 'ext': 'mp4', + 'title': 're:^Live Webcam Scalinata di Piazza di Spagna - La Barcaccia [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'Roma, veduta sulla Scalinata di Piazza di Spagna e sulla Barcaccia', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + stream_url = self._search_regex( + r'url\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage, + 'stream url', group='url') + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + return { + 'id': video_id, + 'url': stream_url, + 'ext': 'mp4', + 'title': self._live_title(title), + 'description': description, + 'is_live': True, + } From 0f3d41b44d84869e7f4e809692ce71567b3f7130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 19:48:54 +0700 Subject: [PATCH 0348/1696] [devscripts/run_tests] Exclude youtube lists tests from core build --- devscripts/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh index c60807215..6ba26720d 100755 --- a/devscripts/run_tests.sh +++ b/devscripts/run_tests.sh @@ -1,6 +1,6 @@ #!/bin/bash -DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter" +DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter|youtube_lists" test_set="" multiprocess_args="" From 28572a1a0b27ba3ccedac5d8d093f925dfb7485f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 5 Mar 2016 23:07:25 +0600 Subject: [PATCH 0349/1696] [compat] Add compat_numeric_types --- youtube_dl/compat.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 718902019..b257e2e81 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2760,6 +2760,10 @@ else: compat_kwargs = lambda kwargs: kwargs +compat_numeric_types = ((int, float, long, complex) if sys.version_info[0] < 3 + else (int, float, complex)) + + if sys.version_info < (2, 7): def compat_socket_create_connection(address, timeout, source_address=None): host, port = address @@ -2895,6 +2899,7 @@ __all__ = [ 'compat_input', 'compat_itertools_count', 'compat_kwargs', + 'compat_numeric_types', 'compat_ord', 'compat_os_name', 'compat_parse_qs', From d0d9ade4860fd44a07f5513d13b66233fdca0e89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 6 Mar 2016 03:52:42 +0600 Subject: [PATCH 0350/1696] [YoutubeDL] Add support for string formatting operations in output template --- test/test_YoutubeDL.py | 14 ++++++++++++++ youtube_dl/YoutubeDL.py | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 2cfcf743a..8491a88bd 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -526,6 +526,7 @@ class TestYoutubeDL(unittest.TestCase): 'id': '1234', 'ext': 'mp4', 'width': None, + 'height': 1080, } def fname(templ): @@ -535,6 +536,19 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4') # Replace missing fields with 'NA' self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4') + self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4') + self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4') + self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4') + self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4') + self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4') + self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4') + self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s') + self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4') def test_format_note(self): ydl = YoutubeDL() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 68000dea2..bdaf06e62 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -33,6 +33,7 @@ from .compat import ( compat_get_terminal_size, compat_http_client, compat_kwargs, + compat_numeric_types, compat_os_name, compat_str, compat_tokenize_tokenize, @@ -609,12 +610,45 @@ class YoutubeDL(object): compat_str(v), restricted=self.params.get('restrictfilenames'), is_id=(k == 'id')) - template_dict = dict((k, sanitize(k, v)) + template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v)) for k, v in template_dict.items() if v is not None and not isinstance(v, (list, tuple, dict))) template_dict = collections.defaultdict(lambda: 'NA', template_dict) outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) + + NUMERIC_FIELDS = set(( + 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', + 'upload_year', 'upload_month', 'upload_day', + 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', + 'average_rating', 'comment_count', 'age_limit', + 'start_time', 'end_time', + 'chapter_number', 'season_number', 'episode_number', + )) + + # Missing numeric fields used together with integer presentation types + # in format specification will break the argument substitution since + # string 'NA' is returned for missing fields. We will patch output + # template for missing fields to meet string presentation type. + for numeric_field in NUMERIC_FIELDS: + if numeric_field not in template_dict: + # As of [1] format syntax is: + # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type + # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting + FORMAT_RE = r'''(?x) + (?<!%) + % + \({0}\) # mapping key + (?:[#0\-+ ]+)? # conversion flags (optional) + (?:\d+)? # minimum field width (optional) + (?:\.\d+)? # precision (optional) + [hlL]? # length modifier (optional) + [diouxXeEfFgGcrs%] # conversion type + ''' + outtmpl = re.sub( + FORMAT_RE.format(numeric_field), + r'%({0})s'.format(numeric_field), outtmpl) + tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict # Temporary fix for #4787 From 89db639dfe02b291bbf901973ca00d6e60fc1dce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 22:01:09 +0700 Subject: [PATCH 0351/1696] [YoutubeDL] Rewrite outtmpl for playlist_index and autonumber for backward compatibility --- youtube_dl/YoutubeDL.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bdaf06e62..fc5e67828 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -594,10 +594,7 @@ class YoutubeDL(object): autonumber_size = self.params.get('autonumber_size') if autonumber_size is None: autonumber_size = 5 - autonumber_templ = '%0' + str(autonumber_size) + 'd' - template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads) - if template_dict.get('playlist_index') is not None: - template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index']) + template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads if template_dict.get('resolution') is None: if template_dict.get('width') and template_dict.get('height'): template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height']) @@ -617,6 +614,20 @@ class YoutubeDL(object): outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) + # For fields playlist_index and autonumber convert all occurrences + # of %(field)s to %(field)0Nd for backward compatibility + field_size_compat_map = { + 'playlist_index': len(str(template_dict['n_entries'])), + 'autonumber': autonumber_size, + } + FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s' + mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl) + if mobj: + outtmpl = re.sub( + FIELD_SIZE_COMPAT_RE, + r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')], + outtmpl) + NUMERIC_FIELDS = set(( 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', 'upload_year', 'upload_month', 'upload_day', @@ -624,6 +635,7 @@ class YoutubeDL(object): 'average_rating', 'comment_count', 'age_limit', 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', + 'playlist_index', )) # Missing numeric fields used together with integer presentation types From b3175982c31a61ff4184d666b0bdb6dd34213365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 22:01:57 +0700 Subject: [PATCH 0352/1696] [YoutubeDL] Add more numeric fields for NA substitution in outtmpl --- youtube_dl/YoutubeDL.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index fc5e67828..56a8691eb 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -635,6 +635,7 @@ class YoutubeDL(object): 'average_rating', 'comment_count', 'age_limit', 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', + 'track_number', 'disc_number', 'release_year', 'playlist_index', )) From fafc2bf5a92b8397148e47e0c9b46fb4d9212075 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 22:11:16 +0700 Subject: [PATCH 0353/1696] [options] Deprecate --autonumber-size --- youtube_dl/options.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 2c880d06a..5a11dddf9 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -680,7 +680,9 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--autonumber-size', dest='autonumber_size', metavar='NUMBER', default=5, type=int, - help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given (default is %default)') + help='[deprecated; use output template with %(autonumber)0Nd, where N in the number of digits] ' + 'Specify the number of digits in %(autonumber)s when it is present ' + 'in output filename template or --auto-number option is given (default is %default)') filesystem.add_option( '--autonumber-start', dest='autonumber_start', metavar='NUMBER', default=1, type=int, From 050f143c1286ddafcb6966a0f679c5bbaceecca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Feb 2017 23:00:13 +0700 Subject: [PATCH 0354/1696] [README.md] Clarify sequence types in output template and document numeric string formatting operations --- README.md | 140 +++++++++++++++++++++++++++--------------------------- 1 file changed, 71 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index 1eccfd287..b4364ef61 100644 --- a/README.md +++ b/README.md @@ -486,87 +486,89 @@ The `-o` option allows users to indicate a template for the output file names. **tl;dr:** [navigate me to examples](#output-template-examples). -The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are: - - - `id`: Video identifier - - `title`: Video title - - `url`: Video URL - - `ext`: Video filename extension - - `alt_title`: A secondary title of the video - - `display_id`: An alternative identifier for the video - - `uploader`: Full name of the video uploader - - `license`: License name the video is licensed under - - `creator`: The creator of the video - - `release_date`: The date (YYYYMMDD) when the video was released - - `timestamp`: UNIX timestamp of the moment the video became available - - `upload_date`: Video upload date (YYYYMMDD) - - `uploader_id`: Nickname or id of the video uploader - - `location`: Physical location where the video was filmed - - `duration`: Length of the video in seconds - - `view_count`: How many users have watched the video on the platform - - `like_count`: Number of positive ratings of the video - - `dislike_count`: Number of negative ratings of the video - - `repost_count`: Number of reposts of the video - - `average_rating`: Average rating give by users, the scale used depends on the webpage - - `comment_count`: Number of comments on the video - - `age_limit`: Age restriction for the video (years) - - `format`: A human-readable description of the format - - `format_id`: Format code specified by `--format` - - `format_note`: Additional info about the format - - `width`: Width of the video - - `height`: Height of the video - - `resolution`: Textual description of width and height - - `tbr`: Average bitrate of audio and video in KBit/s - - `abr`: Average audio bitrate in KBit/s - - `acodec`: Name of the audio codec in use - - `asr`: Audio sampling rate in Hertz - - `vbr`: Average video bitrate in KBit/s - - `fps`: Frame rate - - `vcodec`: Name of the video codec in use - - `container`: Name of the container format - - `filesize`: The number of bytes, if known in advance - - `filesize_approx`: An estimate for the number of bytes - - `protocol`: The protocol that will be used for the actual download - - `extractor`: Name of the extractor - - `extractor_key`: Key name of the extractor - - `epoch`: Unix epoch when creating the file - - `autonumber`: Five-digit number that will be increased with each download, starting at zero - - `playlist`: Name or id of the playlist that contains the video - - `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist - - `playlist_id`: Playlist identifier - - `playlist_title`: Playlist title +The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are: + + - `id` (string): Video identifier + - `title` (string): Video title + - `url` (string): Video URL + - `ext` (string): Video filename extension + - `alt_title` (string): A secondary title of the video + - `display_id` (string): An alternative identifier for the video + - `uploader` (string): Full name of the video uploader + - `license` (string): License name the video is licensed under + - `creator` (string): The creator of the video + - `release_date` (string): The date (YYYYMMDD) when the video was released + - `timestamp` (numeric): UNIX timestamp of the moment the video became available + - `upload_date` (string): Video upload date (YYYYMMDD) + - `uploader_id` (string): Nickname or id of the video uploader + - `location` (string): Physical location where the video was filmed + - `duration` (numeric): Length of the video in seconds + - `view_count` (numeric): How many users have watched the video on the platform + - `like_count` (numeric): Number of positive ratings of the video + - `dislike_count` (numeric): Number of negative ratings of the video + - `repost_count` (numeric): Number of reposts of the video + - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage + - `comment_count` (numeric): Number of comments on the video + - `age_limit` (numeric): Age restriction for the video (years) + - `format` (string): A human-readable description of the format + - `format_id` (string): Format code specified by `--format` + - `format_note` (string): Additional info about the format + - `width` (numeric): Width of the video + - `height` (numeric): Height of the video + - `resolution` (string): Textual description of width and height + - `tbr` (numeric): Average bitrate of audio and video in KBit/s + - `abr` (numeric): Average audio bitrate in KBit/s + - `acodec` (string): Name of the audio codec in use + - `asr` (numeric): Audio sampling rate in Hertz + - `vbr` (numeric): Average video bitrate in KBit/s + - `fps` (numeric): Frame rate + - `vcodec` (string): Name of the video codec in use + - `container` (string): Name of the container format + - `filesize` (numeric): The number of bytes, if known in advance + - `filesize_approx` (numeric): An estimate for the number of bytes + - `protocol` (string): The protocol that will be used for the actual download + - `extractor` (string): Name of the extractor + - `extractor_key` (string): Key name of the extractor + - `epoch` (numeric): Unix epoch when creating the file + - `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero + - `playlist` (string): Name or id of the playlist that contains the video + - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist + - `playlist_id` (string): Playlist identifier + - `playlist_title` (string): Playlist title Available for the video that belongs to some logical chapter or section: - - `chapter`: Name or title of the chapter the video belongs to - - `chapter_number`: Number of the chapter the video belongs to - - `chapter_id`: Id of the chapter the video belongs to + - `chapter` (string): Name or title of the chapter the video belongs to + - `chapter_number` (numeric): Number of the chapter the video belongs to + - `chapter_id` (string): Id of the chapter the video belongs to Available for the video that is an episode of some series or programme: - - `series`: Title of the series or programme the video episode belongs to - - `season`: Title of the season the video episode belongs to - - `season_number`: Number of the season the video episode belongs to - - `season_id`: Id of the season the video episode belongs to - - `episode`: Title of the video episode - - `episode_number`: Number of the video episode within a season - - `episode_id`: Id of the video episode + - `series` (string): Title of the series or programme the video episode belongs to + - `season` (string): Title of the season the video episode belongs to + - `season_number` (numeric): Number of the season the video episode belongs to + - `season_id` (string): Id of the season the video episode belongs to + - `episode` (string): Title of the video episode + - `episode_number` (numeric): Number of the video episode within a season + - `episode_id` (string): Id of the video episode Available for the media that is a track or a part of a music album: - - `track`: Title of the track - - `track_number`: Number of the track within an album or a disc - - `track_id`: Id of the track - - `artist`: Artist(s) of the track - - `genre`: Genre(s) of the track - - `album`: Title of the album the track belongs to - - `album_type`: Type of the album - - `album_artist`: List of all artists appeared on the album - - `disc_number`: Number of the disc or other physical medium the track belongs to - - `release_year`: Year (YYYY) when the album was released + - `track` (string): Title of the track + - `track_number` (numeric): Number of the track within an album or a disc + - `track_id` (string): Id of the track + - `artist` (string): Artist(s) of the track + - `genre` (string): Genre(s) of the track + - `album` (string): Title of the album the track belongs to + - `album_type` (string): Type of the album + - `album_artist` (string): List of all artists appeared on the album + - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to + - `release_year` (numeric): Year (YYYY) when the album was released Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`. For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory. +For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. + Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. To use percent literals in an output template use `%%`. To output to stdout use `-o -`. From 8e1409fd805b3b5e3731da66a2101494643a06ea Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Feb 2017 18:42:06 +0100 Subject: [PATCH 0355/1696] [go] sign all uplynk urls and use geo bypass only for free videos(closes #12087)(closes #12210) --- youtube_dl/extractor/go.py | 81 ++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index b205bfc7c..21ed846b2 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -37,7 +37,6 @@ class GoIE(AdobePassIE): } } _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys()) - _GEO_COUNTRIES = ['US'] _TESTS = [{ 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', 'info_dict': { @@ -79,44 +78,60 @@ class GoIE(AdobePassIE): ext = determine_ext(asset_url) if ext == 'm3u8': video_type = video_data.get('type') - if video_type == 'lf': - data = { - 'video_id': video_data['id'], - 'video_type': video_type, - 'brand': brand, - 'device': '001', - } - if video_data.get('accesslevel') == '1': - requestor_id = site_info['requestor_id'] - resource = self._get_mvpd_resource( - requestor_id, title, video_id, None) - auth = self._extract_mvpd_auth( - url, video_id, requestor_id, resource) - data.update({ - 'token': auth, - 'token_type': 'ap', - 'adobe_requestor_id': requestor_id, - }) - entitlement = self._download_json( - 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', - video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers()) - errors = entitlement.get('errors', {}).get('errors', []) - if errors: - for error in errors: - if error.get('code') == 1002: - self.raise_geo_restricted( - error['message'], countries=self._GEO_COUNTRIES) - error_message = ', '.join([error['message'] for error in errors]) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) - asset_url += '?' + entitlement['uplynkData']['sessionKey'] + data = { + 'video_id': video_data['id'], + 'video_type': video_type, + 'brand': brand, + 'device': '001', + } + if video_data.get('accesslevel') == '1': + requestor_id = site_info['requestor_id'] + resource = self._get_mvpd_resource( + requestor_id, title, video_id, None) + auth = self._extract_mvpd_auth( + url, video_id, requestor_id, resource) + data.update({ + 'token': auth, + 'token_type': 'ap', + 'adobe_requestor_id': requestor_id, + }) + else: + self._initialize_geo_bypass(['US']) + entitlement = self._download_json( + 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', + video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers()) + errors = entitlement.get('errors', {}).get('errors', []) + if errors: + for error in errors: + if error.get('code') == 1002: + self.raise_geo_restricted( + error['message'], countries=['US']) + error_message = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) + asset_url += '?' + entitlement['uplynkData']['sessionKey'] formats.extend(self._extract_m3u8_formats( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) else: - formats.append({ + f = { 'format_id': format_id, 'url': asset_url, 'ext': ext, - }) + } + if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url): + f.update({ + 'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE', + 'preference': 1, + }) + else: + mobj = re.search(r'/(\d+)x(\d+)/', asset_url) + if mobj: + height = int(mobj.group(2)) + f.update({ + 'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height, + 'width': int(mobj.group(1)), + 'height': height, + }) + formats.append(f) self._sort_formats(formats) subtitles = {} From 19f3821821ada01fbf9b466402bc1d0366b3edb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 02:09:13 +0700 Subject: [PATCH 0356/1696] [devscripts/make_lazy_extractors] Fix making lazy extractors on python 3 under Windows --- devscripts/make_lazy_extractors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 19114d30d..0a1762dbc 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals, print_function from inspect import getsource +import io import os from os.path import dirname as dirn import sys @@ -95,5 +96,5 @@ module_contents.append( module_src = '\n'.join(module_contents) + '\n' -with open(lazy_extractors_filename, 'wt') as f: +with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: f.write(module_src) From 0f57447de790fb2434cdd80d819876859ac4fcc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 04:56:58 +0700 Subject: [PATCH 0357/1696] [postprocessor/ffmpeg] Add mising space (closes #12232) --- youtube_dl/postprocessor/ffmpeg.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 1881f4849..96ddb3b36 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -536,8 +536,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): ext = sub['ext'] if ext == new_ext: self._downloader.to_screen( - '[ffmpeg] Subtitle file for %s is already in the requested' - 'format' % new_ext) + '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) continue old_file = subtitles_filename(filename, lang, ext) sub_filenames.append(old_file) From f2980fddeb587f113afe15cc3ecf4bfc3911ca67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 05:01:31 +0700 Subject: [PATCH 0358/1696] [lynda:course] Add webpage extraction fallback (closes #12238) --- youtube_dl/extractor/lynda.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index da94eab56..d2f75296a 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -260,9 +260,24 @@ class LyndaCourseIE(LyndaBaseIE): course_path = mobj.group('coursepath') course_id = mobj.group('courseid') + item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path + course = self._download_json( 'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, - course_id, 'Downloading course JSON') + course_id, 'Downloading course JSON', fatal=False) + + if not course: + webpage = self._download_webpage(url, course_id) + entries = [ + self.url_result( + item_template % video_id, ie=LyndaIE.ie_key(), + video_id=video_id) + for video_id in re.findall( + r'data-video-id=["\'](\d+)', webpage)] + return self.playlist_result( + entries, course_id, + self._og_search_title(webpage, fatal=False), + self._og_search_description(webpage)) if course.get('Status') == 'NotFound': raise ExtractorError( @@ -283,7 +298,7 @@ class LyndaCourseIE(LyndaBaseIE): if video_id: entries.append({ '_type': 'url_transparent', - 'url': 'https://www.lynda.com/%s/%s-4.html' % (course_path, video_id), + 'url': item_template % video_id, 'ie_key': LyndaIE.ie_key(), 'chapter': chapter.get('Title'), 'chapter_number': int_or_none(chapter.get('ChapterIndex')), From be5df5ee311b3ad027f5d59fe077969babd0aa53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 06:04:27 +0700 Subject: [PATCH 0359/1696] Suppress help for all deprecated options and print warning when used --- youtube_dl/YoutubeDL.py | 14 ++++++++++++-- youtube_dl/__init__.py | 3 +++ youtube_dl/options.py | 12 +++++------- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 56a8691eb..f7254560c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -328,11 +328,21 @@ class YoutubeDL(object): self.params.update(params) self.cache = Cache(self) - if self.params.get('cn_verification_proxy') is not None: - self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.') + def check_deprecated(param, option, suggestion): + if self.params.get(param) is not None: + self.report_warning( + '%s is deprecated. Use %s instead.' % (option, suggestion)) + return True + return False + + if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): if self.params.get('geo_verification_proxy') is None: self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] + check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits') + check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') + check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') + if params.get('bidi_workaround', False): try: import pty diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index f91d29a7b..0c401baa6 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -416,6 +416,9 @@ def _real_main(argv=None): 'config_location': opts.config_location, 'geo_bypass': opts.geo_bypass, 'geo_bypass_country': opts.geo_bypass_country, + # just for deprecation check + 'autonumber': opts.autonumber if opts.autonumber is True else None, + 'usetitle': opts.usetitle if opts.usetitle is True else None, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5a11dddf9..8b51d3c6f 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -679,10 +679,8 @@ def parseOpts(overrideArguments=None): help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info')) filesystem.add_option( '--autonumber-size', - dest='autonumber_size', metavar='NUMBER', default=5, type=int, - help='[deprecated; use output template with %(autonumber)0Nd, where N in the number of digits] ' - 'Specify the number of digits in %(autonumber)s when it is present ' - 'in output filename template or --auto-number option is given (default is %default)') + dest='autonumber_size', metavar='NUMBER', type=int, + help=optparse.SUPPRESS_HELP) filesystem.add_option( '--autonumber-start', dest='autonumber_start', metavar='NUMBER', default=1, type=int, @@ -694,15 +692,15 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '-A', '--auto-number', action='store_true', dest='autonumber', default=False, - help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000') + help=optparse.SUPPRESS_HELP) filesystem.add_option( '-t', '--title', action='store_true', dest='usetitle', default=False, - help='[deprecated] Use title in file name (default)') + help=optparse.SUPPRESS_HELP) filesystem.add_option( '-l', '--literal', default=False, action='store_true', dest='usetitle', - help='[deprecated] Alias of --title') + help=optparse.SUPPRESS_HELP) filesystem.add_option( '-w', '--no-overwrites', action='store_true', dest='nooverwrites', default=False, From f2f7961820155b6db4a70f083f011cd014c9b51c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 06:07:41 +0700 Subject: [PATCH 0360/1696] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index cff065171..6f63723fc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version <unreleased> + +Core +* [options] Hide deprecated options from --help +* [options] Deprecate --autonumber-size ++ [YoutubeDL] Add support for string formatting operations in output template + (#5185, #5748, #6841, #9929, #9966 #9978, #12189) + +Extractors ++ [lynda:course] Add webpage extraction fallback (#12238) +* [go] Sign all uplynk URLs and use geo bypass only for free videos + (#12087, #12210) ++ [skylinewebcams] Add support for skylinewebcams.com (#12221) ++ [instagram] Add support for multi video posts (#12226) ++ [crunchyroll] Extract playlist entries ids +* [mgtv] Fix extraction ++ [sohu] Raise GeoRestrictedError ++ [leeco] Raise GeoRestrictedError and use geo bypass mechanism + + version 2017.02.22 Extractors From 6b097cff278c93de0665bf681729d75121a98eed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 06:09:15 +0700 Subject: [PATCH 0361/1696] release 2017.02.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 10 ---------- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 6 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 923f28276..743b796ac 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.22** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.22 +[debug] youtube-dl version 2017.02.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 6f63723fc..6034c9ea4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.24 Core * [options] Hide deprecated options from --help diff --git a/README.md b/README.md index b4364ef61..0fc5984dc 100644 --- a/README.md +++ b/README.md @@ -217,21 +217,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo --id Use only video ID in file name -o, --output TEMPLATE Output filename template, see the "OUTPUT TEMPLATE" for all the info - --autonumber-size NUMBER Specify the number of digits in - %(autonumber)s when it is present in output - filename template or --auto-number option - is given (default is 5) --autonumber-start NUMBER Specify the start value for %(autonumber)s (default is 1) --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames - -A, --auto-number [deprecated; use -o - "%(autonumber)s-%(title)s.%(ext)s" ] Number - downloaded files starting from 00000 - -t, --title [deprecated] Use title in file name - (default) - -l, --literal [deprecated] Alias of --title -w, --no-overwrites Do not overwrite files -c, --continue Force resume of partially downloaded files. By default, youtube-dl will resume diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1eb9c2cdd..f97397331 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -680,6 +680,7 @@ - **Shared**: shared.sx - **ShowRoomLive** - **Sina** + - **SkylineWebcams** - **skynewsarabia:article** - **skynewsarabia:video** - **SkySports** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fce1b8558..129447e10 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.22' +__version__ = '2017.02.24' From 42dcdbe11cd738e6b196bc9c14b746a71d61de5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 10:52:41 +0700 Subject: [PATCH 0362/1696] [ivi] Raise GeoRestrictedError --- youtube_dl/extractor/ivi.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 3d3c15024..cb51cef2d 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -16,6 +16,8 @@ class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['RU'] _TESTS = [ # Single movie @@ -91,7 +93,11 @@ class IviIE(InfoExtractor): if 'error' in video_json: error = video_json['error'] - if error['origin'] == 'NoRedisValidData': + origin = error['origin'] + if origin == 'NotAllowedForLocation': + self.raise_geo_restricted( + msg=error['message'], countries=self._GEO_COUNTRIES) + elif origin == 'NoRedisValidData': raise ExtractorError('Video %s does not exist' % video_id, expected=True) raise ExtractorError( 'Unable to download video %s: %s' % (video_id, error['message']), From 39e7277ed16c1647d636c766d57870121f5f2d68 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 24 Feb 2017 11:21:13 +0100 Subject: [PATCH 0363/1696] [openload] fix extraction(closes #10408) --- youtube_dl/extractor/openload.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 10896c442..fc7ff43a6 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -72,16 +72,21 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) ol_id = self._search_regex( - '<span[^>]+id="[^"]+"[^>]*>([0-9]+)</span>', + '<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>', webpage, 'openload ID') - first_two_chars = int(float(ol_id[0:][:2])) + first_char = int(ol_id[0]) urlcode = [] - num = 2 + num = 1 while num < len(ol_id): - key = int(float(ol_id[num + 3:][:2])) - urlcode.append((key, compat_chr(int(float(ol_id[num:][:3])) - first_two_chars))) + i = ord(ol_id[num]) + key = 0 + if i <= 90: + key = i - 65 + elif i >= 97: + key = 25 + i - 97 + urlcode.append((key, compat_chr(int(ol_id[num + 2:num + 5]) // int(ol_id[num + 1]) - first_char))) num += 5 video_url = 'https://openload.co/stream/' + ''.join( From 68f17a9c2df07828d364421e59ede26981aa7756 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 24 Feb 2017 12:27:56 +0100 Subject: [PATCH 0364/1696] [tubitv] use geo bypass mechanism --- youtube_dl/extractor/tubitv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index 3a37df2e8..c44018aec 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -16,6 +16,7 @@ class TubiTvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)' _LOGIN_URL = 'http://tubitv.com/login' _NETRC_MACHINE = 'tubitv' + _GEO_COUNTRIES = ['US'] _TEST = { 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', 'md5': '43ac06be9326f41912dc64ccf7a80320', From 51ed496307a8a98134f25e8128c317663ccd4355 Mon Sep 17 00:00:00 2001 From: Thomas Christlieb <thomaschristlieb@hotmail.com> Date: Fri, 24 Feb 2017 15:08:45 +0100 Subject: [PATCH 0365/1696] [thescene] Fix extraction (closes #12235) --- youtube_dl/extractor/thescene.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py index ce1326c03..6c16d66ed 100644 --- a/youtube_dl/extractor/thescene.py +++ b/youtube_dl/extractor/thescene.py @@ -32,7 +32,7 @@ class TheSceneIE(InfoExtractor): player = self._download_webpage(player_url, display_id) info = self._parse_json( self._search_regex( - r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'), + r'(?m)video\s*:\s*({.+?}),$', player, 'info json'), display_id) qualities_order = qualities(('low', 'high')) @@ -40,7 +40,7 @@ class TheSceneIE(InfoExtractor): 'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']), 'url': f['src'], 'quality': qualities_order(f['quality']), - } for f in info['sources'][0]] + } for f in info['sources']] self._sort_formats(formats) return { From 441d7a32e563b3985c58af5ab80d76ad943f0c07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 21:22:29 +0700 Subject: [PATCH 0366/1696] [thescene] Extract more metadata --- youtube_dl/extractor/thescene.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py index 6c16d66ed..b8504f0eb 100644 --- a/youtube_dl/extractor/thescene.py +++ b/youtube_dl/extractor/thescene.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urlparse -from ..utils import qualities +from ..utils import ( + int_or_none, + qualities, +) class TheSceneIE(InfoExtractor): @@ -16,6 +19,11 @@ class TheSceneIE(InfoExtractor): 'ext': 'mp4', 'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear', 'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear', + 'duration': 127, + 'series': 'Style.com Fashion Shows', + 'season': 'Ready To Wear Spring 2013', + 'tags': list, + 'categories': list, }, } @@ -35,6 +43,9 @@ class TheSceneIE(InfoExtractor): r'(?m)video\s*:\s*({.+?}),$', player, 'info json'), display_id) + video_id = info['id'] + title = info['title'] + qualities_order = qualities(('low', 'high')) formats = [{ 'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']), @@ -44,9 +55,14 @@ class TheSceneIE(InfoExtractor): self._sort_formats(formats) return { - 'id': info['id'], + 'id': video_id, 'display_id': display_id, - 'title': info['title'], + 'title': title, 'formats': formats, 'thumbnail': info.get('poster_frame'), + 'duration': int_or_none(info.get('duration')), + 'series': info.get('series_title'), + 'season': info.get('season_title'), + 'tags': info.get('tags'), + 'categories': info.get('categories'), } From f3bc281239bafa971195eefd30773d152bfdb10c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 21:48:34 +0700 Subject: [PATCH 0367/1696] [noco] Swtich login URL to https (closes #12246) --- youtube_dl/extractor/noco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 70ff2ab36..fc0624dd0 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -23,7 +23,7 @@ from ..utils import ( class NocoIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' - _LOGIN_URL = 'http://noco.tv/do.php' + _LOGIN_URL = 'https://noco.tv/do.php' _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s' _SUB_LANG_TEMPLATE = '&sub_lang=%s' _NETRC_MACHINE = 'noco' From ad3033037ce6b6809c3d06f2074d36691ba10cbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 21:50:13 +0700 Subject: [PATCH 0368/1696] [noco] Modernize --- youtube_dl/extractor/noco.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index fc0624dd0..8b83e1f76 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -69,16 +69,17 @@ class NocoIE(InfoExtractor): if username is None: return - login_form = { - 'a': 'login', - 'cookie': '1', - 'username': username, - 'password': password, - } - request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') - - login = self._download_json(request, None, 'Logging in as %s' % username) + login = self._download_json( + self._LOGIN_URL, None, 'Logging in as %s' % username, + data=urlencode_postdata({ + 'a': 'login', + 'cookie': '1', + 'username': username, + 'password': password, + }), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + }) if 'erreur' in login: raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) From 5d3a51e1b939ada71b3ec6ae7e004ccb6e0861c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 21:57:39 +0700 Subject: [PATCH 0369/1696] [ChangeLog] Actualize --- ChangeLog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index 6034c9ea4..0225af42a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +version <unreleased> + +Extractors +* [noco] Modernize +* [noco] Switch login URL to https (#12246) ++ [thescene] Extract more metadata +* [thescene] Fix extraction (#12235) ++ [tubitv] Use geo bypass mechanism +* [openload] Fix extraction (#10408) ++ [ivi] Raise GeoRestrictedError + + version 2017.02.24 Core From 204efc850905a5b78c86f884b94210644784b9c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Feb 2017 21:59:39 +0700 Subject: [PATCH 0370/1696] release 2017.02.24.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 743b796ac..564108122 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.24.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.24.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.24 +[debug] youtube-dl version 2017.02.24.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 0225af42a..add8a6758 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.02.24.1 Extractors * [noco] Modernize diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 129447e10..fe7462eac 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.24' +__version__ = '2017.02.24.1' From 231bcd0b6b7737e6b0484ce8aaa4a14de442a1ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Feb 2017 02:51:53 +0700 Subject: [PATCH 0371/1696] [amcnetworks] Relax _VALID_URL (#12127) --- youtube_dl/extractor/amcnetworks.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index b71d1a093..3a0ec6776 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -10,7 +10,7 @@ from ..utils import ( class AMCNetworksIE(ThePlatformIE): - _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', 'md5': '', @@ -44,6 +44,12 @@ class AMCNetworksIE(ThePlatformIE): }, { 'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version', 'only_matching': True, + }, { + 'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention', + 'only_matching': True, + }, { + 'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3', + 'only_matching': True, }] def _real_extract(self, url): From eeb0a9568442a4dbbf3478579abe2696fbe890e2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 25 Feb 2017 18:40:05 +0800 Subject: [PATCH 0372/1696] [extractor/common] Add 'preference' to _parse_html5_media_entries Some websites, like NJPWorld, put different qualities on different player pages. --- youtube_dl/extractor/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4252d6825..eb3c091aa 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2010,7 +2010,7 @@ class InfoExtractor(object): }) return formats - def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None): + def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None): def absolute_url(video_url): return compat_urlparse.urljoin(base_url, video_url) @@ -2032,7 +2032,8 @@ class InfoExtractor(object): is_plain_url = False formats = self._extract_m3u8_formats( full_url, video_id, ext='mp4', - entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id) + entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id, + preference=preference) elif ext == 'mpd': is_plain_url = False formats = self._extract_mpd_formats( From db182c63fb4a58974e425a56d235131fd9efc531 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 25 Feb 2017 18:44:39 +0800 Subject: [PATCH 0373/1696] [njpwworld] Add new extractor (closes #11561) --- ChangeLog | 6 +++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/njpwworld.py | 83 ++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 youtube_dl/extractor/njpwworld.py diff --git a/ChangeLog b/ChangeLog index add8a6758..e0e1f52d0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [njpwworld] Add new extractor (#11561) + + version 2017.02.24.1 Extractors diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 83a170fa7..703bc5d34 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -637,6 +637,7 @@ from .ninecninemedia import ( from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE +from .njpwworld import NJPWWorldIE from .nobelprize import NobelPrizeIE from .noco import NocoIE from .normalboots import NormalbootsIE diff --git a/youtube_dl/extractor/njpwworld.py b/youtube_dl/extractor/njpwworld.py new file mode 100644 index 000000000..f5e3f6815 --- /dev/null +++ b/youtube_dl/extractor/njpwworld.py @@ -0,0 +1,83 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + get_element_by_class, + urlencode_postdata, +) + + +class NJPWWorldIE(InfoExtractor): + _VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)' + IE_DESC = '新日本プロレスワールド' + _NETRC_MACHINE = 'njpwworld' + + _TEST = { + 'url': 'http://njpwworld.com/p/s_series_00155_1_9/', + 'info_dict': { + 'id': 's_series_00155_1_9', + 'ext': 'mp4', + 'title': '第9試合 ランディ・サベージ vs リック・スタイナー', + 'tags': list, + }, + 'params': { + 'skip_download': True, # AES-encrypted m3u8 + }, + 'skip': 'Requires login', + } + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + # No authentication to be performed + if not username: + return True + + webpage, urlh = self._download_webpage_handle( + 'https://njpwworld.com/auth/login', None, + note='Logging in', errnote='Unable to login', + data=urlencode_postdata({'login_id': username, 'pw': password})) + # /auth/login will return 302 for successful logins + if urlh.geturl() == 'https://njpwworld.com/auth/login': + self.report_warning('unable to login') + return False + + return True + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + formats = [] + for player_url, kind in re.findall(r'<a[^>]+href="(/player[^"]+)".+?<img[^>]+src="[^"]+qf_btn_([^".]+)', webpage): + player_url = compat_urlparse.urljoin(url, player_url) + + player_page = self._download_webpage( + player_url, video_id, note='Downloading player page') + + entries = self._parse_html5_media_entries( + player_url, player_page, video_id, m3u8_id='hls-%s' % kind, + m3u8_entry_protocol='m3u8_native', + preference=2 if 'hq' in kind else 1) + formats.extend(entries[0]['formats']) + + self._sort_formats(formats) + + post_content = get_element_by_class('post-content', webpage) + tags = re.findall( + r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content + ) if post_content else None + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'formats': formats, + 'tags': tags, + } From 831217291ac05ad75ef16fd6d9985e255489c1e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Feb 2017 19:44:31 +0700 Subject: [PATCH 0374/1696] [compat] Use try except for compat_numeric_types --- youtube_dl/compat.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index b257e2e81..0c119e417 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2760,8 +2760,10 @@ else: compat_kwargs = lambda kwargs: kwargs -compat_numeric_types = ((int, float, long, complex) if sys.version_info[0] < 3 - else (int, float, complex)) +try: + compat_numeric_types = (int, float, long, complex) +except NameError: # Python 3 + compat_numeric_types = (int, float, complex) if sys.version_info < (2, 7): From 922ab7840b01bd108887849063572cffa855cdc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Feb 2017 20:16:40 +0700 Subject: [PATCH 0375/1696] [etonline] Add extractor (closes #12236) --- youtube_dl/extractor/etonline.py | 39 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 40 insertions(+) create mode 100644 youtube_dl/extractor/etonline.py diff --git a/youtube_dl/extractor/etonline.py b/youtube_dl/extractor/etonline.py new file mode 100644 index 000000000..17d7cfec6 --- /dev/null +++ b/youtube_dl/extractor/etonline.py @@ -0,0 +1,39 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class ETOnlineIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?etonline\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.etonline.com/tv/211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale/', + 'info_dict': { + 'id': '211130_dove_cameron_liv_and_maddie_emotional_episode_series_finale', + 'title': 'md5:a21ec7d3872ed98335cbd2a046f34ee6', + 'description': 'md5:8b94484063f463cca709617c79618ccd', + }, + 'playlist_count': 2, + }, { + 'url': 'http://www.etonline.com/media/video/here_are_the_stars_who_love_bringing_their_moms_as_dates_to_the_oscars-211359/', + 'only_matching': True, + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911076001/default_default/index.html?videoId=ref:%s' + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % video_id, 'BrightcoveNew', video_id) + for video_id in re.findall( + r'site\.brightcove\s*\([^,]+,\s*["\'](title_\d+)', webpage)] + + return self.playlist_result( + entries, playlist_id, + self._og_search_title(webpage, fatal=False), + self._og_search_description(webpage)) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 703bc5d34..9f5aaf1d8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -288,6 +288,7 @@ from .espn import ( ESPNArticleIE, ) from .esri import EsriVideoIE +from .etonline import ETOnlineIE from .europa import EuropaIE from .everyonesmixtape import EveryonesMixtapeIE from .expotv import ExpoTVIE From 103f8c8d36170d5cb489420db0e8fe383b1f93dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Feb 2017 20:38:21 +0700 Subject: [PATCH 0376/1696] [xhamster] Capture and output videoClosed error (#12263) --- youtube_dl/extractor/xhamster.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 36a8c9840..7b6703714 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( dict_get, + ExtractorError, int_or_none, parse_duration, unified_strdate, @@ -57,6 +58,10 @@ class XHamsterIE(InfoExtractor): }, { 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', 'only_matching': True, + }, { + # This video is visible for marcoalfa123456's friends only + 'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html', + 'only_matching': True, }] def _real_extract(self, url): @@ -78,6 +83,12 @@ class XHamsterIE(InfoExtractor): mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo) webpage = self._download_webpage(mrss_url, video_id) + error = self._html_search_regex( + r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>', + webpage, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + title = self._html_search_regex( [r'<h1[^>]*>([^<]+)</h1>', r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"', From d374d943f3d9eca4b2052cfc830905d299a0688a Mon Sep 17 00:00:00 2001 From: Pratyush Singh <singh.pratyush96@gmail.com> Date: Sat, 24 Dec 2016 21:05:41 +0530 Subject: [PATCH 0377/1696] [downloader/common] Limit displaying 2 digits after decimal point in sleep interval message --- youtube_dl/downloader/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 3dc144b4e..2c4470a95 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -347,7 +347,10 @@ class FileDownloader(object): if min_sleep_interval: max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) - self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) + self.to_screen( + '[download] Sleeping %s seconds...' % ( + int(sleep_interval) if sleep_interval.is_integer() + else '%.2f' % sleep_interval)) time.sleep(sleep_interval) return self.real_download(filename, info_dict) From 5fc8d893612285ddba082f381006337573b36afa Mon Sep 17 00:00:00 2001 From: Ricardo Constantino <wiiaboo@gmail.com> Date: Sat, 18 Feb 2017 17:42:31 +0000 Subject: [PATCH 0378/1696] [freshlive] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/freshlive.py | 64 ++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 youtube_dl/extractor/freshlive.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9f5aaf1d8..860b8f422 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -339,6 +339,7 @@ from .francetv import ( ) from .freesound import FreesoundIE from .freespeech import FreespeechIE +from .freshlive import FreshliveIE from .funimation import FunimationIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE diff --git a/youtube_dl/extractor/freshlive.py b/youtube_dl/extractor/freshlive.py new file mode 100644 index 000000000..113f8f4fa --- /dev/null +++ b/youtube_dl/extractor/freshlive.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + int_or_none, + parse_iso8601 +) + +class FreshliveIE(InfoExtractor): + _VALID_URL = r'https?://freshlive\.tv/(?P<streamer>[^/]+)/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://freshlive.tv/satotv/74712', + 'md5': '224f50d268b6b9f94e4198deccd55d6d', + 'info_dict': { + 'description': 'テスト', + 'duration': 1511, + 'id': '74712', + 'ext': 'mp4', + 'timestamp': 1483621764, + 'title': 'テスト', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20170105', + 'uploader': 'サトTV', + 'uploader_id': 'satotv', + 'view_count': int, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + options = self._parse_json( + self._search_regex( + r'window\.__CONTEXT__\s*=\s*({.+?});\s*</script>', + webpage, 'initial context'), + video_id) + + programs = options['context']['dispatcher']['stores']['ProgramStore']['programs'] + info = programs.get(video_id, {}) + + video_url = info.get('liveStreamUrl') or info.get('archiveStreamUrl') + if not video_url: + raise ExtractorError('%s not a valid broadcast ID' % video_id, expected=True) + + formats = self._extract_m3u8_formats( + video_url, video_id, ext='mp4', m3u8_id='hls') + + return { + 'id': video_id, + 'formats': formats, + 'title': info.get('title'), + 'description': info.get('description'), + 'duration': int_or_none(info.get('airTime')), + 'is_live': int_or_none(info.get('airTime')) == None, + 'thumbnail': info.get('thumbnailUrl'), + 'uploader': info.get('channel', {}).get('title'), + 'uploader_id': info.get('channel', {}).get('code'), + 'uploader_url': info.get('channel', {}).get('permalink'), + 'timestamp': parse_iso8601(info.get('startAt')), + 'view_count': int_or_none(info.get('viewCount')), + } \ No newline at end of file From e498758b9cfa983d05b10a185ecdef480a93cf30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Feb 2017 22:56:42 +0700 Subject: [PATCH 0379/1696] [freshlive] Fix issues and improve (closes #12175) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/freshlive.py | 64 ++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 860b8f422..58139ee4e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -339,7 +339,7 @@ from .francetv import ( ) from .freesound import FreesoundIE from .freespeech import FreespeechIE -from .freshlive import FreshliveIE +from .freshlive import FreshLiveIE from .funimation import FunimationIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE diff --git a/youtube_dl/extractor/freshlive.py b/youtube_dl/extractor/freshlive.py index 113f8f4fa..a90f9156c 100644 --- a/youtube_dl/extractor/freshlive.py +++ b/youtube_dl/extractor/freshlive.py @@ -2,34 +2,40 @@ from __future__ import unicode_literals from .common import InfoExtractor - +from ..compat import compat_str from ..utils import ( + ExtractorError, int_or_none, - parse_iso8601 + try_get, + unified_timestamp, ) -class FreshliveIE(InfoExtractor): - _VALID_URL = r'https?://freshlive\.tv/(?P<streamer>[^/]+)/(?P<id>[0-9]+)' + +class FreshLiveIE(InfoExtractor): + _VALID_URL = r'https?://freshlive\.tv/[^/]+/(?P<id>\d+)' _TEST = { 'url': 'https://freshlive.tv/satotv/74712', - 'md5': '224f50d268b6b9f94e4198deccd55d6d', + 'md5': '9f0cf5516979c4454ce982df3d97f352', 'info_dict': { - 'description': 'テスト', - 'duration': 1511, 'id': '74712', 'ext': 'mp4', - 'timestamp': 1483621764, 'title': 'テスト', + 'description': 'テスト', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1511, + 'timestamp': 1483619655, 'upload_date': '20170105', 'uploader': 'サトTV', 'uploader_id': 'satotv', 'view_count': int, + 'comment_count': int, + 'is_live': False, } } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) options = self._parse_json( @@ -38,27 +44,41 @@ class FreshliveIE(InfoExtractor): webpage, 'initial context'), video_id) - programs = options['context']['dispatcher']['stores']['ProgramStore']['programs'] - info = programs.get(video_id, {}) + info = options['context']['dispatcher']['stores']['ProgramStore']['programs'][video_id] - video_url = info.get('liveStreamUrl') or info.get('archiveStreamUrl') - if not video_url: - raise ExtractorError('%s not a valid broadcast ID' % video_id, expected=True) + title = info['title'] + + if info.get('status') == 'upcoming': + raise ExtractorError('Stream %s is upcoming' % video_id, expected=True) + + stream_url = info.get('liveStreamUrl') or info['archiveStreamUrl'] + + is_live = info.get('liveStreamUrl') is not None formats = self._extract_m3u8_formats( - video_url, video_id, ext='mp4', m3u8_id='hls') + stream_url, video_id, ext='mp4', + entry_protocol='m3u8' if is_live else 'm3u8_native', + m3u8_id='hls') + + if is_live: + title = self._live_title(title) return { 'id': video_id, 'formats': formats, - 'title': info.get('title'), + 'title': title, 'description': info.get('description'), - 'duration': int_or_none(info.get('airTime')), - 'is_live': int_or_none(info.get('airTime')) == None, 'thumbnail': info.get('thumbnailUrl'), - 'uploader': info.get('channel', {}).get('title'), - 'uploader_id': info.get('channel', {}).get('code'), - 'uploader_url': info.get('channel', {}).get('permalink'), - 'timestamp': parse_iso8601(info.get('startAt')), + 'duration': int_or_none(info.get('airTime')), + 'timestamp': unified_timestamp(info.get('createdAt')), + 'uploader': try_get( + info, lambda x: x['channel']['title'], compat_str), + 'uploader_id': try_get( + info, lambda x: x['channel']['code'], compat_str), + 'uploader_url': try_get( + info, lambda x: x['channel']['permalink'], compat_str), 'view_count': int_or_none(info.get('viewCount')), - } \ No newline at end of file + 'comment_count': int_or_none(info.get('commentCount')), + 'tags': info.get('tags', []), + 'is_live': is_live, + } From 9d0c08a02c55136221621e8b8dda5860211af8dd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 25 Feb 2017 01:40:12 +0800 Subject: [PATCH 0380/1696] [vevo] Fix videos with the new streams/streamsV3 format (closes #11719) --- ChangeLog | 2 ++ youtube_dl/extractor/vevo.py | 22 +++++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index e0e1f52d0..701afd57a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,8 @@ version <unreleased> Extractors +* [vevo] Fix extraction for videos with the new streams/streamsV3 format + (#11719) + [njpwworld] Add new extractor (#11561) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c4e37f694..9aa38bc5a 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -17,12 +17,12 @@ from ..utils import ( class VevoBaseIE(InfoExtractor): - def _extract_json(self, webpage, video_id, item): + def _extract_json(self, webpage, video_id): return self._parse_json( self._search_regex( r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>', webpage, 'initial store'), - video_id)['default'][item] + video_id) class VevoIE(VevoBaseIE): @@ -139,6 +139,11 @@ class VevoIE(VevoBaseIE): # no genres available 'url': 'http://www.vevo.com/watch/INS171400764', 'only_matching': True, + }, { + # Another case available only via the webpage; using streams/streamsV3 formats + # Geo-restricted to Netherlands/Germany + 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909', + 'only_matching': True, }] _VERSIONS = { 0: 'youtube', # only in AuthenticateVideo videoVersions @@ -193,7 +198,14 @@ class VevoIE(VevoBaseIE): # https://github.com/rg3/youtube-dl/issues/9366) if not video_versions: webpage = self._download_webpage(url, video_id) - video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0] + json_data = self._extract_json(webpage, video_id) + if 'streams' in json_data.get('default', {}): + video_versions = json_data['default']['streams'][video_id][0] + else: + video_versions = [ + value + for key, value in json_data['apollo']['data'].items() + if key.startswith('%s.streams' % video_id)] uploader = None artist = None @@ -207,7 +219,7 @@ class VevoIE(VevoBaseIE): formats = [] for video_version in video_versions: - version = self._VERSIONS.get(video_version['version']) + version = self._VERSIONS.get(video_version.get('version'), 'generic') version_url = video_version.get('url') if not version_url: continue @@ -339,7 +351,7 @@ class VevoPlaylistIE(VevoBaseIE): if video_id: return self.url_result('vevo:%s' % video_id, VevoIE.ie_key()) - playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind) + playlists = self._extract_json(webpage, playlist_id)['default']['%ss' % playlist_kind] playlist = (list(playlists.values())[0] if playlist_kind == 'playlist' else playlists[playlist_id]) From b3aec47665104223578181c71cc90112f5b17fce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Feb 2017 23:27:14 +0700 Subject: [PATCH 0381/1696] [tvigle] Raise GeoRestrictedError --- youtube_dl/extractor/tvigle.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index f3817ab28..3475ef4c3 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -17,6 +17,9 @@ class TvigleIE(InfoExtractor): IE_DESC = 'Интернет-телевидение Tvigle.ru' _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['RU'] + _TESTS = [ { 'url': 'http://www.tvigle.ru/video/sokrat/', @@ -72,8 +75,13 @@ class TvigleIE(InfoExtractor): error_message = item.get('errorMessage') if not videos and error_message: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) + if item.get('isGeoBlocked') is True: + self.raise_geo_restricted( + msg=error_message, countries=self._GEO_COUNTRIES) + else: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error_message), + expected=True) title = item['title'] description = item.get('description') From a5cf17989b04e559fda9a2731a3b33e881c5cc3c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 26 Feb 2017 17:24:54 +0800 Subject: [PATCH 0382/1696] [MDR] Relax _VALID_URL and playerURL matching and update _TESTS Ref: #12169 --- ChangeLog | 1 + youtube_dl/extractor/mdr.py | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 701afd57a..4009acf37 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [MDR] Recognize more URL patterns (#12169) * [vevo] Fix extraction for videos with the new streams/streamsV3 format (#11719) + [njpwworld] Add new extractor (#11561) diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py index 6e4290aad..322e5b45a 100644 --- a/youtube_dl/extractor/mdr.py +++ b/youtube_dl/extractor/mdr.py @@ -14,7 +14,7 @@ from ..utils import ( class MDRIE(InfoExtractor): IE_DESC = 'MDR.DE and KiKA' - _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+-?(?P<id>\d+)(?:_.+?)?\.html' + _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html' _TESTS = [{ # MDR regularly deletes its videos @@ -31,6 +31,7 @@ class MDRIE(InfoExtractor): 'duration': 250, 'uploader': 'MITTELDEUTSCHER RUNDFUNK', }, + 'skip': '404 not found', }, { 'url': 'http://www.kika.de/baumhaus/videos/video19636.html', 'md5': '4930515e36b06c111213e80d1e4aad0e', @@ -41,6 +42,7 @@ class MDRIE(InfoExtractor): 'duration': 134, 'uploader': 'KIKA', }, + 'skip': '404 not found', }, { 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html', 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', @@ -49,11 +51,21 @@ class MDRIE(InfoExtractor): 'ext': 'mp4', 'title': 'Beutolomäus und der geheime Weihnachtswunsch', 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd', - 'timestamp': 1450950000, - 'upload_date': '20151224', + 'timestamp': 1482541200, + 'upload_date': '20161224', 'duration': 4628, 'uploader': 'KIKA', }, + }, { + # audio with alternative playerURL pattern + 'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html', + 'info_dict': { + 'id': '100', + 'ext': 'mp4', + 'title': 'Feature: Operation Mindfuck - Robert Anton Wilson', + 'duration': 3239, + 'uploader': 'MITTELDEUTSCHER RUNDFUNK', + }, }, { 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html', 'only_matching': True, @@ -71,7 +83,7 @@ class MDRIE(InfoExtractor): webpage = self._download_webpage(url, video_id) data_url = self._search_regex( - r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1', + r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+?-avCustom\.xml)\1', webpage, 'data url', group='url').replace(r'\/', '/') doc = self._download_xml( From 8878789f1117b59186ecc6bf82f462201166a26a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Feb 2017 16:50:57 +0700 Subject: [PATCH 0383/1696] [dailymotion] Raise GeoRestrictedError --- youtube_dl/extractor/dailymotion.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index b312401dc..246efde43 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -282,9 +282,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor): } def _check_error(self, info): + error = info.get('error') if info.get('error') is not None: + title = error['title'] + # See https://developer.dailymotion.com/api#access-error + if error.get('code') == 'DM007': + self.raise_geo_restricted(msg=title) raise ExtractorError( - '%s said: %s' % (self.IE_NAME, info['error']['title']), expected=True) + '%s said: %s' % (self.IE_NAME, title), expected=True) def _get_subtitles(self, video_id, webpage): try: From fd5c4aab5958a2a086072488913cc190ff028bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Feb 2017 16:51:21 +0700 Subject: [PATCH 0384/1696] [youtube] Raise GeoRestrictedError --- youtube_dl/extractor/youtube.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index dec02804b..b3c3cd5b2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -47,7 +47,6 @@ from ..utils import ( unsmuggle_url, uppercase_escape, urlencode_postdata, - ISO3166Utils, ) @@ -371,6 +370,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } _SUBTITLE_FORMATS = ('ttml', 'vtt') + _GEO_BYPASS = False + IE_NAME = 'youtube' _TESTS = [ { @@ -917,7 +918,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # itag 212 'url': '1t24XAntNCY', 'only_matching': True, - } + }, + { + # geo restricted to JP + 'url': 'sJL6WA-aGkQ', + 'only_matching': True, + }, ] def __init__(self, *args, **kwargs): @@ -1376,11 +1382,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if 'token' not in video_info: if 'reason' in video_info: if 'The uploader has not made this video available in your country.' in video_info['reason']: - regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None) - if regions_allowed: - raise ExtractorError('YouTube said: This video is available in %s only' % ( - ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))), - expected=True) + regions_allowed = self._html_search_meta( + 'regionsAllowed', video_webpage, default=None) + countries = regions_allowed.split(',') if regions_allowed else None + self.raise_geo_restricted( + msg=video_info['reason'][0], countries=countries) raise ExtractorError( 'YouTube said: %s' % video_info['reason'][0], expected=True, video_id=video_id) @@ -2126,6 +2132,10 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'id': 'UUs0ifCMCm1icqRbqhUINa0w', 'title': 'Uploads from Deus Ex', }, + }, { + # geo restricted to JP + 'url': 'https://www.youtube.com/user/kananishinoSMEJ', + 'only_matching': True, }] @classmethod From 7fd465525695bb589fa8932e1e36f38ad511735b Mon Sep 17 00:00:00 2001 From: xbe <xbe@users.noreply.github.com> Date: Sun, 26 Feb 2017 03:08:10 -0800 Subject: [PATCH 0385/1696] [crunchyroll] Extract uploader name that's not a link Provide the Crunchyroll extractor with the ability to extract uploader names that aren't links. Add a test for this new functionality. This fixes #12267. --- youtube_dl/extractor/crunchyroll.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index a1fc6a756..9c6cf00ca 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -207,6 +207,21 @@ class CrunchyrollIE(CrunchyrollBaseIE): # Just test metadata extraction 'skip_download': True, }, + }, { + # make sure we can extract an uploader name that's not a link + 'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899', + 'info_dict': { + 'id': '606899', + 'ext': 'mp4', + 'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors', + 'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"', + 'uploader': 'Geneon Entertainment', + 'upload_date': '20120717', + }, + 'params': { + # just test metadata extraction + 'skip_download': True, + }, }] _FORMAT_IDS = { @@ -388,8 +403,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text if video_upload_date: video_upload_date = unified_strdate(video_upload_date) video_uploader = self._html_search_regex( - r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage, - 'video_uploader', fatal=False) + # try looking for both an uploader that's a link and one that's not + [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'], + webpage, 'video_uploader', fatal=False) available_fmts = [] for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): From fdeea72611b2c2c29a9a34e91ae4bd9b8dfc1f64 Mon Sep 17 00:00:00 2001 From: Alex Seiler <seileralex@gmail.com> Date: Sun, 26 Feb 2017 15:05:52 +0100 Subject: [PATCH 0386/1696] [cda] Decode URL (fixes #12255) --- youtube_dl/extractor/cda.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index ae7af2f0e..1ee35b501 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import codecs import re from .common import InfoExtractor @@ -96,6 +97,10 @@ class CDAIE(InfoExtractor): if not video or 'file' not in video: self.report_warning('Unable to extract %s version information' % version) return + if video['file'].startswith('uggc'): + video['file'] = codecs.decode(video['file'], 'rot_13') + if video['file'].endswith('adc.mp4'): + video['file'] = video['file'].replace('adc.mp4', '.mp4') f = { 'url': video['file'], } From 892b47ab6c9147e785c562229e6dab305fffba61 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 27 Feb 2017 21:34:33 +0800 Subject: [PATCH 0387/1696] [scivee] Remove extractor (#9315) The Wikipedia page is changed from active to down: https://en.wikipedia.org/w/index.php?title=SciVee&diff=prev&oldid=723161154 Some other interesting bits: $ nslookup www.scivee.tv Server: 8.8.8.8 Address: 8.8.8.8#53 Non-authoritative answer: www.scivee.tv canonical name = scivee.rcsb.org. Name: scivee.rcsb.org Address: 132.249.231.211 $ nslookup rcsb.org Server: 8.8.8.8 Address: 8.8.8.8#53 Non-authoritative answer: Name: rcsb.org Address: 132.249.231.77 Both IPs are from UCSD. I guess it's maintained by a lab and they don't maintain it anymore. --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/scivee.py | 57 ------------------------------ 3 files changed, 1 insertion(+), 58 deletions(-) delete mode 100644 youtube_dl/extractor/scivee.py diff --git a/ChangeLog b/ChangeLog index 4009acf37..4ed9cb4e0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +- [scivee] Remove extractor (#9315) + [MDR] Recognize more URL patterns (#12169) * [vevo] Fix extraction for videos with the new streams/streamsV3 format (#11719) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 58139ee4e..d09104096 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -838,7 +838,6 @@ from .safari import ( from .sapo import SapoIE from .savefrom import SaveFromIE from .sbs import SBSIE -from .scivee import SciVeeIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ScrippsNetworksWatchIE diff --git a/youtube_dl/extractor/scivee.py b/youtube_dl/extractor/scivee.py deleted file mode 100644 index b1ca12fde..000000000 --- a/youtube_dl/extractor/scivee.py +++ /dev/null @@ -1,57 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class SciVeeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?scivee\.tv/node/(?P<id>\d+)' - - _TEST = { - 'url': 'http://www.scivee.tv/node/62352', - 'md5': 'b16699b74c9e6a120f6772a44960304f', - 'info_dict': { - 'id': '62352', - 'ext': 'mp4', - 'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting', - 'description': 'md5:81f1710638e11a481358fab1b11059d7', - }, - 'skip': 'Not accessible from Travis CI server', - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - # annotations XML is malformed - annotations = self._download_webpage( - 'http://www.scivee.tv/assets/annotations/%s' % video_id, video_id, 'Downloading annotations') - - title = self._html_search_regex(r'<title>([^<]+)', annotations, 'title') - description = self._html_search_regex(r'([^<]+)', annotations, 'abstract', fatal=False) - filesize = int_or_none(self._html_search_regex( - r'([^<]+)', annotations, 'filesize', fatal=False)) - - formats = [ - { - 'url': 'http://www.scivee.tv/assets/audio/%s' % video_id, - 'ext': 'mp3', - 'format_id': 'audio', - }, - { - 'url': 'http://www.scivee.tv/assets/video/%s' % video_id, - 'ext': 'mp4', - 'format_id': 'video', - 'filesize': filesize, - }, - ] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id, - 'formats': formats, - } From 0e879f432afe5b9a04a06cbc697cf28d08ac5518 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 22:22:43 +0700 Subject: [PATCH 0388/1696] [youtube:channel] Remove duplicate test --- youtube_dl/extractor/youtube.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b3c3cd5b2..7053e5512 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2132,10 +2132,6 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'id': 'UUs0ifCMCm1icqRbqhUINa0w', 'title': 'Uploads from Deus Ex', }, - }, { - # geo restricted to JP - 'url': 'https://www.youtube.com/user/kananishinoSMEJ', - 'only_matching': True, }] @classmethod @@ -2236,7 +2232,7 @@ class YoutubeUserIE(YoutubeChannelIE): 'url': 'https://www.youtube.com/gametrailers', 'only_matching': True, }, { - # This channel is not available. + # This channel is not available, geo restricted to JP 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos', 'only_matching': True, }] From 0dc5a86a329314f551f86c2ef3202342b7506667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 22:43:19 +0700 Subject: [PATCH 0389/1696] [npo] Add support for hetklokhuis.nl (closes #12293) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/npo.py | 44 ++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d09104096..0910b7b05 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -669,6 +669,7 @@ from .npo import ( NPORadioIE, NPORadioFragmentIE, SchoolTVIE, + HetKlokhuisIE, VPROIE, WNLIE, ) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 962437145..09e8d9987 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -416,7 +416,21 @@ class NPORadioFragmentIE(InfoExtractor): } -class SchoolTVIE(InfoExtractor): +class NPODataMidEmbedIE(InfoExtractor): + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r'data-mid=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video_id', group='id') + return { + '_type': 'url_transparent', + 'ie_key': 'NPO', + 'url': 'npo:%s' % video_id, + 'display_id': display_id + } + + +class SchoolTVIE(NPODataMidEmbedIE): IE_NAME = 'schooltv' _VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P[^/?#&]+)' @@ -435,17 +449,25 @@ class SchoolTVIE(InfoExtractor): } } - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - r'data-mid=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video_id', group='id') - return { - '_type': 'url_transparent', - 'ie_key': 'NPO', - 'url': 'npo:%s' % video_id, - 'display_id': display_id + +class HetKlokhuisIE(NPODataMidEmbedIE): + IE_NAME = 'schooltv' + _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P[^/?#&]+)' + + _TEST = { + 'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven', + 'info_dict': { + 'id': 'VPWON_1260528', + 'display_id': 'Zwaartekrachtsgolven', + 'ext': 'm4v', + 'title': 'Het Klokhuis: Zwaartekrachtsgolven', + 'description': 'md5:c94f31fb930d76c2efa4a4a71651dd48', + 'upload_date': '20170223', + }, + 'params': { + 'skip_download': True } + } class NPOPlaylistBaseIE(NPOIE): From f264c62334fdd31a7620b4fdefb822e1bae6bd77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 23:10:00 +0700 Subject: [PATCH 0390/1696] [npo] Add support for zapp.nl --- youtube_dl/extractor/npo.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 09e8d9987..7c2c93f27 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -51,7 +51,8 @@ class NPOIE(NPOBaseIE): (?: npo\.nl/(?!live|radio)(?:[^/]+/){2}| ntr\.nl/(?:[^/]+/){2,}| - omroepwnl\.nl/video/fragment/[^/]+__ + omroepwnl\.nl/video/fragment/[^/]+__| + zapp\.nl/[^/]+/(?:gemist|filmpjes)/ ) ) (?P[^/?#]+) @@ -140,6 +141,14 @@ class NPOIE(NPOBaseIE): 'upload_date': '20150508', 'duration': 462, }, + }, + { + 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', + 'only_matching': True, + }, + { + 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', + 'only_matching': True, } ] From dbc01fdb6f4b4c58469ffb75d00a179f5af5cdcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 23:10:29 +0700 Subject: [PATCH 0391/1696] [hetklokhuis] Fix IE_NAME --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 7c2c93f27..b53c29993 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -460,7 +460,7 @@ class SchoolTVIE(NPODataMidEmbedIE): class HetKlokhuisIE(NPODataMidEmbedIE): - IE_NAME = 'schooltv' + IE_NAME = 'hetklokhuis' _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P[^/?#&]+)' _TEST = { From 18abb743762ce5b9b2ffd4d9d5e01b62621cc62e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 23:13:51 +0700 Subject: [PATCH 0392/1696] [npo] Relax _VALID_URL for zapp.nl --- youtube_dl/extractor/npo.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index b53c29993..50473d777 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -52,7 +52,7 @@ class NPOIE(NPOBaseIE): npo\.nl/(?!live|radio)(?:[^/]+/){2}| ntr\.nl/(?:[^/]+/){2,}| omroepwnl\.nl/video/fragment/[^/]+__| - zapp\.nl/[^/]+/(?:gemist|filmpjes)/ + zapp\.nl/[^/]+/[^/]+/ ) ) (?P[^/?#]+) @@ -149,6 +149,10 @@ class NPOIE(NPOBaseIE): { 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', 'only_matching': True, + }, + { + 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', + 'only_matching': True, } ] From c6184bcf7b58476b659a92290711e4c58faff277 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 23:24:03 +0700 Subject: [PATCH 0393/1696] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4ed9cb4e0..4cb897f7f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,11 +1,27 @@ version +Core +* [downloader/common] Limit displaying 2 digits after decimal point in sleep + interval message (#12183) ++ [extractor/common] Add preference to _parse_html5_media_entries + Extractors ++ [npo] Add support for zapp.nl ++ [npo] Add support for hetklokhuis.nl (#12293) - [scivee] Remove extractor (#9315) -+ [MDR] Recognize more URL patterns (#12169) ++ [cda] Decode download URL (#12255) ++ [crunchyroll] Improve uploader extraction (#12267) ++ [youtube] Raise GeoRestrictedError ++ [dailymotion] Raise GeoRestrictedError ++ [mdr] Recognize more URL patterns (#12169) ++ [tvigle] Raise GeoRestrictedError * [vevo] Fix extraction for videos with the new streams/streamsV3 format (#11719) -+ [njpwworld] Add new extractor (#11561) ++ [freshlive] Add support for freshlive.tv (#12175) ++ [xhamster] Capture and output videoClosed error (#12263) ++ [etonline] Add support for etonline.com (#12236) ++ [njpwworld] Add support for njpwworld.com (#11561) +* [amcnetworks] Relax URL regular expression (#12127) version 2017.02.24.1 From ef48a1175dc4e28b07c55ae7277d8196abec7ace Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Feb 2017 23:26:07 +0700 Subject: [PATCH 0394/1696] release 2017.02.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 5 ++++- youtube_dl/version.py | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 564108122..6374f7c25 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.24.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.24.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.24.1 +[debug] youtube-dl version 2017.02.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 4cb897f7f..949ea1810 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.02.27 Core * [downloader/common] Limit displaying 2 digits after decimal point in sleep diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f97397331..1b01c6d9d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -239,6 +239,7 @@ - **ESPN** - **ESPNArticle** - **EsriVideo** + - **ETOnline** - **Europa** - **EveryonesMixtape** - **ExpoTV** @@ -274,6 +275,7 @@ - **francetvinfo.fr** - **Freesound** - **freespeech.org** + - **FreshLive** - **Funimation** - **FunnyOrDie** - **Fusion** @@ -310,6 +312,7 @@ - **HellPorno** - **Helsinki**: helsinki.fi - **HentaiStigma** + - **hetklokhuis** - **hgtv.com:show** - **HistoricFilms** - **history:topic**: History.com Topic @@ -511,6 +514,7 @@ - **Nintendo** - **njoy**: N-JOY - **njoy:embed** + - **NJPWWorld**: 新日本プロレスワールド - **NobelPrize** - **Noco** - **Normalboots** @@ -666,7 +670,6 @@ - **savefrom.net** - **SBS**: sbs.com.au - **schooltv** - - **SciVee** - **screen.yahoo:search**: Yahoo screen search - **Screencast** - **ScreencastOMatic** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fe7462eac..261218b80 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.24.1' +__version__ = '2017.02.27' From f4c68ba372655c8ff4d6b1cfecfa129796159afd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 28 Feb 2017 21:40:22 +0800 Subject: [PATCH 0395/1696] [douyu] Fix extraction and update _TESTS They've switched from flv to hls Closes #12301 --- ChangeLog | 6 ++++ youtube_dl/extractor/douyutv.py | 53 ++++++--------------------------- 2 files changed, 15 insertions(+), 44 deletions(-) diff --git a/ChangeLog b/ChangeLog index 949ea1810..f9986ae61 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [douyu] Fix extraction (#12301) + + version 2017.02.27 Core diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index 911594413..9a83fb31a 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -1,15 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals -import hashlib -import time -import uuid - from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) from ..utils import ( ExtractorError, unescapeHTML, @@ -24,8 +16,8 @@ class DouyuTVIE(InfoExtractor): 'info_dict': { 'id': '17732', 'display_id': 'iseven', - 'ext': 'flv', - 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'ext': 'mp4', + 'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': r're:.*m7show@163\.com.*', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': '7师傅', @@ -39,7 +31,7 @@ class DouyuTVIE(InfoExtractor): 'info_dict': { 'id': '85982', 'display_id': '85982', - 'ext': 'flv', + 'ext': 'mp4', 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', 'thumbnail': r're:^https?://.*\.jpg$', @@ -55,8 +47,8 @@ class DouyuTVIE(InfoExtractor): 'info_dict': { 'id': '17732', 'display_id': '17732', - 'ext': 'flv', - 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'ext': 'mp4', + 'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': r're:.*m7show@163\.com.*', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': '7师傅', @@ -96,45 +88,18 @@ class DouyuTVIE(InfoExtractor): if room.get('show_status') == '2': raise ExtractorError('Live stream is offline', expected=True) - tt = compat_str(int(time.time() / 60)) - did = uuid.uuid4().hex.upper() - - sign_content = ''.join((room_id, did, self._API_KEY, tt)) - sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest() - - flv_data = compat_urllib_parse_urlencode({ - 'cdn': 'ws', - 'rate': '0', - 'tt': tt, - 'did': did, - 'sign': sign, - }) - - video_info = self._download_json( - 'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id, - data=flv_data, note='Downloading video info', - headers={'Content-Type': 'application/x-www-form-urlencoded'}) - - error_code = video_info.get('error', 0) - if error_code is not 0: - raise ExtractorError( - '%s reported error %i' % (self.IE_NAME, error_code), - expected=True) - - base_url = video_info['data']['rtmp_url'] - live_path = video_info['data']['rtmp_live'] - - video_url = '%s/%s' % (base_url, live_path) + formats = self._extract_m3u8_formats( + room['hls_url'], video_id, ext='mp4') title = self._live_title(unescapeHTML(room['room_name'])) - description = room.get('notice') + description = room.get('show_details') thumbnail = room.get('room_src') uploader = room.get('nickname') return { 'id': room_id, 'display_id': video_id, - 'url': video_url, + 'formats': formats, 'title': title, 'description': description, 'thumbnail': thumbnail, From c9619f0a17927086c49e4b443202be296d734a76 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 27 Feb 2017 18:47:47 +0800 Subject: [PATCH 0396/1696] [aes] Add aes_cbc_encrypt Used in daisuki.net (#4738) --- test/test_aes.py | 9 ++++++++- youtube_dl/aes.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/test/test_aes.py b/test/test_aes.py index 54078a66d..78a28751b 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -8,7 +8,7 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text +from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes import base64 @@ -34,6 +34,13 @@ class TestAES(unittest.TestCase): decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + def test_cbc_encrypt(self): + data = bytes_to_intlist(self.secret_msg) + encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) + self.assertEqual( + encrypted, + b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd") + def test_decrypt_text(self): password = intlist_to_bytes(self.key).decode('utf-8') encrypted = base64.b64encode( diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index b8ff45481..c5bb3c4ef 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -60,6 +60,34 @@ def aes_cbc_decrypt(data, key, iv): return decrypted_data +def aes_cbc_encrypt(data, key, iv): + """ + Encrypt with aes in CBC mode. Using PKCS#7 padding + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte IV + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + previous_cipher_block = iv + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + remaining_length = BLOCK_SIZE_BYTES - len(block) + block += [remaining_length] * remaining_length + mixed_block = xor(block, previous_cipher_block) + + encrypted_block = aes_encrypt(mixed_block, expanded_key) + encrypted_data += encrypted_block + + previous_cipher_block = encrypted_block + + return encrypted_data + + def key_expansion(data): """ Generate key schedule From f48409c7ac186fa38bbeb2df2b210e37a18eb04b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 27 Feb 2017 18:50:19 +0800 Subject: [PATCH 0397/1696] [utils] Add pkcs1pad Used in daisuki.net (#4738) --- test/test_utils.py | 9 +++++++++ youtube_dl/utils.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 3cdb21d40..aefd94518 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -52,6 +52,7 @@ from youtube_dl.utils import ( parse_filesize, parse_count, parse_iso8601, + pkcs1pad, read_batch_urls, sanitize_filename, sanitize_path, @@ -1104,6 +1105,14 @@ The first line ohdave_rsa_encrypt(b'aa111222', e, N), '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881') + def test_pkcs1pad(self): + data = [1, 2, 3] + padded_data = pkcs1pad(data, 32) + self.assertEqual(padded_data[:2], [0, 2]) + self.assertEqual(padded_data[28:], [0, 1, 2, 3]) + + self.assertRaises(ValueError, pkcs1pad, data, 8) + def test_encode_base_n(self): self.assertEqual(encode_base_n(0, 30), '0') self.assertEqual(encode_base_n(80, 30), '2k') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 17b83794a..8bd075eaf 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3336,6 +3336,21 @@ def ohdave_rsa_encrypt(data, exponent, modulus): return '%x' % encrypted +def pkcs1pad(data, length): + """ + Padding input data with PKCS#1 scheme + + @param {int[]} data input data + @param {int} length target length + @returns {int[]} padded data + """ + if len(data) > length - 11: + raise ValueError('Input data too long for PKCS#1 padding') + + pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)] + return [0, 2] + pseudo_random + [0] + data + + def encode_base_n(num, n, table=None): FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' if not table: From 0a5445ddbeb8d391bbac92e5fe9074c6aa2e1565 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 28 Feb 2017 19:16:55 +0800 Subject: [PATCH 0398/1696] [utils] Add bytes_to_long() and long_to_bytes() Used in daisuki.net (#4738) Both are adapted from public domain PyCrypto: https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py --- youtube_dl/utils.py | 51 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 8bd075eaf..807183f4a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3319,6 +3319,57 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): self, req, proxy, type) +# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is +# released into Public Domain +# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387 + +def long_to_bytes(n, blocksize=0): + """long_to_bytes(n:long, blocksize:int) : string + Convert a long integer to a byte string. + + If optional blocksize is given and greater than zero, pad the front of the + byte string with binary zeros so that the length is a multiple of + blocksize. + """ + # after much testing, this algorithm was deemed to be the fastest + s = b'' + n = int(n) + while n > 0: + s = compat_struct_pack('>I', n & 0xffffffff) + s + n = n >> 32 + # strip off leading zeros + for i in range(len(s)): + if s[i] != b'\000'[0]: + break + else: + # only happens when n == 0 + s = b'\000' + i = 0 + s = s[i:] + # add back some pad bytes. this could be done more efficiently w.r.t. the + # de-padding being done above, but sigh... + if blocksize > 0 and len(s) % blocksize: + s = (blocksize - len(s) % blocksize) * b'\000' + s + return s + + +def bytes_to_long(s): + """bytes_to_long(string) : long + Convert a byte string to a long integer. + + This is (essentially) the inverse of long_to_bytes(). + """ + acc = 0 + length = len(s) + if length % 4: + extra = (4 - length % 4) + s = b'\000' * extra + s + length = length + extra + for i in range(0, length, 4): + acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0] + return acc + + def ohdave_rsa_encrypt(data, exponent, modulus): ''' Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/ From 9bd05b5a18c535f5517158d505af5dced498d23e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 27 Feb 2017 18:56:45 +0800 Subject: [PATCH 0399/1696] [daisuki] Add new extractor (closes #4738) --- ChangeLog | 1 + youtube_dl/extractor/daisuki.py | 144 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 + 3 files changed, 149 insertions(+) create mode 100644 youtube_dl/extractor/daisuki.py diff --git a/ChangeLog b/ChangeLog index f9986ae61..401c5885e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [daisuki] Add new extractor (#2486, #3186, #4738, #6175, #7776, #10060) * [douyu] Fix extraction (#12301) diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py new file mode 100644 index 000000000..6fd09faf1 --- /dev/null +++ b/youtube_dl/extractor/daisuki.py @@ -0,0 +1,144 @@ +from __future__ import unicode_literals + +import base64 +import json +import random +import re + +from .common import InfoExtractor +from ..aes import ( + aes_cbc_decrypt, + aes_cbc_encrypt, +) +from ..utils import ( + bytes_to_intlist, + bytes_to_long, + clean_html, + ExtractorError, + intlist_to_bytes, + get_element_by_id, + js_to_json, + int_or_none, + long_to_bytes, + pkcs1pad, + remove_end, +) + + +class DaisukiIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?daisuki\.net/[^/]+/[^/]+/[^/]+/watch\.[^.]+\.(?P\d+)\.html' + + _TEST = { + 'url': 'http://www.daisuki.net/tw/en/anime/watch.TheIdolMasterCG.11213.html', + 'info_dict': { + 'id': '11213', + 'ext': 'mp4', + 'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS', + 'creator': 'BANDAI NAMCO Entertainment', + }, + 'params': { + 'skip_download': True, # AES-encrypted HLS stream + }, + } + + # The public key in PEM format can be found in clientlibs_anime_watch.min.js + _RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537) + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + flashvars = self._parse_json(self._search_regex( + r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'), + video_id, transform_source=js_to_json) + + iv = [0] * 16 + + data = {} + for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'): + data[key] = flashvars.get(key, '') + + encrypted_rtn = None + + # Some AES keys are rejected. Try it with different AES keys + for idx in range(5): + aes_key = [random.randint(0, 254) for _ in range(32)] + padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128)) + + n, e = self._RSA_KEY + encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n)) + init_data = self._download_json('http://www.daisuki.net/bin/bgn/init', video_id, query={ + 's': flashvars.get('s', ''), + 'c': flashvars.get('ss3_prm', ''), + 'e': url, + 'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt( + bytes_to_intlist(json.dumps(data)), + aes_key, iv))).decode('ascii'), + 'a': base64.b64encode(encrypted_aeskey).decode('ascii'), + }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else '')) + + if 'rtn' in init_data: + encrypted_rtn = init_data['rtn'] + break + + self._sleep(5, video_id) + + if encrypted_rtn is None: + raise ExtractorError('Failed to fetch init data') + + rtn = self._parse_json( + intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist( + base64.b64decode(encrypted_rtn)), + aes_key, iv)).decode('utf-8').rstrip('\0'), + video_id) + + formats = self._extract_m3u8_formats( + rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native') + + title = remove_end(self._og_search_title(webpage), ' - DAISUKI') + + creator = self._html_search_regex( + r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'creator': creator, + } + + +class DaisukiPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)daisuki\.net/[^/]+/[^/]+/[^/]+/detail\.(?P[a-zA-Z0-9]+)\.html' + + _TEST = { + 'url': 'http://www.daisuki.net/tw/en/anime/detail.TheIdolMasterCG.html', + 'info_dict': { + 'id': 'TheIdolMasterCG', + 'title': 'THE IDOLM@STER CINDERELLA GIRLS', + 'description': 'md5:0f2c028a9339f7a2c7fbf839edc5c5d8', + }, + 'playlist_count': 26, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + episode_pattern = r'''(?sx) + ]+delay="[^"]+/(\d+)/movie\.jpg".+? + ]+class=".*?\bepisodeNumber\b.*?">(?:]+>)?([^<]+)''' + entries = [{ + '_type': 'url_transparent', + 'url': url.replace('detail', 'watch').replace('.html', '.' + movie_id + '.html'), + 'episode_id': episode_id, + 'episode_number': int_or_none(episode_id), + } for movie_id, episode_id in re.findall(episode_pattern, webpage)] + + playlist_title = remove_end( + self._og_search_title(webpage, fatal=False), ' - Anime - DAISUKI') + playlist_description = clean_html(get_element_by_id('synopsisTxt', webpage)) + + return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0910b7b05..e251d8478 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -227,6 +227,10 @@ from .dailymotion import ( DailymotionUserIE, DailymotionCloudIE, ) +from .daisuki import ( + DaisukiIE, + DaisukiPlaylistIE, +) from .daum import ( DaumIE, DaumClipIE, From 7c4aa6fd6fd6fadf1cf1942c279cd5c0ff5ae498 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 28 Feb 2017 22:29:01 +0800 Subject: [PATCH 0400/1696] [daisuki] Add subtitles (#4738) --- youtube_dl/extractor/daisuki.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py index 6fd09faf1..58cc98666 100644 --- a/youtube_dl/extractor/daisuki.py +++ b/youtube_dl/extractor/daisuki.py @@ -34,6 +34,11 @@ class DaisukiIE(InfoExtractor): 'id': '11213', 'ext': 'mp4', 'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS', + 'subtitles': { + 'mul': [{ + 'ext': 'ttml', + }], + }, 'creator': 'BANDAI NAMCO Entertainment', }, 'params': { @@ -101,10 +106,20 @@ class DaisukiIE(InfoExtractor): creator = self._html_search_regex( r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False) + subtitles = {} + caption_url = rtn.get('caption_url') + if caption_url: + # mul: multiple languages + subtitles['mul'] = [{ + 'url': caption_url, + 'ext': 'ttml', + }] + return { 'id': video_id, 'title': title, 'formats': formats, + 'subtitles': subtitles, 'creator': creator, } From 87dadd456a138c3107ff6254bd03ed832cf2e6d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 28 Feb 2017 23:06:47 +0700 Subject: [PATCH 0401/1696] [youtube:playlist] Recognize another playlist pattern (closes #11928, closes #12286) --- youtube_dl/extractor/youtube.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7053e5512..81c793921 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1851,7 +1851,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): (?: youtube\.com/ (?: - (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) + (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11})) \? (?:.*?[&;])*? (?:p|a|list)= | p/ )| @@ -1924,6 +1924,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'title': 'JODA15', 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', } + }, { + 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', + 'playlist_mincount': 485, + 'info_dict': { + 'title': '2017 華語最新單曲 (2/24更新)', + 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', + } }, { 'note': 'Embedded SWF player', 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', @@ -2072,7 +2079,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): # Check if it's a video-specific URL query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) video_id = query_dict.get('v', [None])[0] or self._search_regex( - r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url, + r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url, 'video id', default=None) if video_id: if self._downloader.params.get('noplaylist'): From 948519b35dec420c3b3ca5369bcba1dead31fcc6 Mon Sep 17 00:00:00 2001 From: Alex Seiler Date: Fri, 17 Feb 2017 04:49:25 +0100 Subject: [PATCH 0402/1696] [azmedien:showplaylist] Add support for all episodes playlists --- youtube_dl/extractor/azmedien.py | 49 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 50 insertions(+) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index cbc3ed564..817fc6d57 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from .kaltura import KalturaIE from ..utils import ( + get_element_by_class, get_element_by_id, strip_or_none, urljoin, @@ -170,3 +171,51 @@ class AZMedienPlaylistIE(AZMedienBaseIE): 'video-title', webpage)), group='title') return self.playlist_result(entries, show_id, title) + + +class AZMedienShowPlaylistIE(AZMedienBaseIE): + IE_DESC = 'AZ Medien Show playlists' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?P + (?: + telezueri\.ch| + telebaern\.tv| + telem1\.ch + )/ + (?: + all-episodes| + alle-episoden + ) + /[^/]+ + ) + ''' + + _TEST = { + 'url': 'http://www.telezueri.ch/all-episodes/astrotalk', + 'info_dict': { + 'id': 'telezueri.ch/all-episodes/astrotalk', + 'title': 'TeleZüri: AstroTalk - alle episoden', + 'description': 'md5:4c0f7e7d741d906004266e295ceb4a26', + }, + 'playlist_mincount': 13, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + episodes = get_element_by_class('search-mobile-box', webpage) + entries = [self.url_result( + urljoin(url, m.group('url'))) for m in re.finditer( + r']+href=(["\'])(?P.+?)\1', episodes)] + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + return self.playlist_result( + entries, + playlist_id=playlist_id, + playlist_title=title, + playlist_description=description) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e251d8478..b1613a9d3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -83,6 +83,7 @@ from .awaan import ( from .azmedien import ( AZMedienIE, AZMedienPlaylistIE, + AZMedienShowPlaylistIE, ) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE From 43b38424a9ca2ce962036b17462d59b8acbf6dd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 28 Feb 2017 23:37:33 +0700 Subject: [PATCH 0403/1696] [azmedien:showplaylist] Improve (closes #12160) --- youtube_dl/extractor/azmedien.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py index 817fc6d57..f4e07d901 100644 --- a/youtube_dl/extractor/azmedien.py +++ b/youtube_dl/extractor/azmedien.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re @@ -174,11 +175,10 @@ class AZMedienPlaylistIE(AZMedienBaseIE): class AZMedienShowPlaylistIE(AZMedienBaseIE): - IE_DESC = 'AZ Medien Show playlists' + IE_DESC = 'AZ Medien show playlists' _VALID_URL = r'''(?x) https?:// (?:www\.)? - (?P (?: telezueri\.ch| telebaern\.tv| @@ -187,22 +187,18 @@ class AZMedienShowPlaylistIE(AZMedienBaseIE): (?: all-episodes| alle-episoden - ) - /[^/]+ - ) + )/ + (?P[^/?#&]+) ''' _TEST = { 'url': 'http://www.telezueri.ch/all-episodes/astrotalk', 'info_dict': { - 'id': 'telezueri.ch/all-episodes/astrotalk', + 'id': 'astrotalk', 'title': 'TeleZüri: AstroTalk - alle episoden', 'description': 'md5:4c0f7e7d741d906004266e295ceb4a26', }, 'playlist_mincount': 13, - 'params': { - 'skip_download': True, - } } def _real_extract(self, url): @@ -211,11 +207,7 @@ class AZMedienShowPlaylistIE(AZMedienBaseIE): episodes = get_element_by_class('search-mobile-box', webpage) entries = [self.url_result( urljoin(url, m.group('url'))) for m in re.finditer( - r']+href=(["\'])(?P.+?)\1', episodes)] - title = self._og_search_title(webpage) + r']+href=(["\'])(?P(?:(?!\1).)+)\1', episodes)] + title = self._og_search_title(webpage, fatal=False) description = self._og_search_description(webpage) - return self.playlist_result( - entries, - playlist_id=playlist_id, - playlist_title=title, - playlist_description=description) + return self.playlist_result(entries, playlist_id, title, description) From 11bae9cdde8ed75b355d5e68ef57dae504ad94ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 28 Feb 2017 23:49:24 +0700 Subject: [PATCH 0404/1696] [ChangeLog] Actualize --- ChangeLog | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 401c5885e..8d33c055b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,15 @@ version +Core ++ [utils] Add bytes_to_long and long_to_bytes ++ [utils] Add pkcs1pad ++ [aes] Add aes_cbc_encrypt + Extractors -+ [daisuki] Add new extractor (#2486, #3186, #4738, #6175, #7776, #10060) ++ [azmedien:showplaylist] Add support for show playlists (#12160) ++ [youtube:playlist] Recognize another playlist pattern (#11928, #12286) ++ [daisuki] Add support for daisuki.net (#2486, #3186, #4738, #6175, #7776, + #10060) * [douyu] Fix extraction (#12301) From 1dc24093f81f349c22e6bda13cb05c26ac3266f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 28 Feb 2017 23:59:22 +0700 Subject: [PATCH 0405/1696] release 2017.02.28 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6374f7c25..c907ef931 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.28** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.27 +[debug] youtube-dl version 2017.02.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 8d33c055b..2a1fd9018 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.02.28 Core + [utils] Add bytes_to_long and long_to_bytes diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1b01c6d9d..a08e00fce 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -78,6 +78,7 @@ - **awaan:video** - **AZMedien**: AZ Medien videos - **AZMedienPlaylist**: AZ Medien playlists + - **AZMedienShowPlaylist**: AZ Medien show playlists - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -191,6 +192,8 @@ - **dailymotion:playlist** - **dailymotion:user** - **DailymotionCloud** + - **Daisuki** + - **DaisukiPlaylist** - **daum.net** - **daum.net:clip** - **daum.net:playlist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 261218b80..340e23bf8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.27' +__version__ = '2017.02.28' From aa9cc2ecbfea6c82944b4e07f3e93c904f1ff421 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Mar 2017 05:03:14 +0700 Subject: [PATCH 0406/1696] [npo] Adapt to app.php API (closes #12311) --- youtube_dl/extractor/npo.py | 79 ++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 50473d777..89082c189 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -17,27 +17,9 @@ from ..utils import ( class NPOBaseIE(InfoExtractor): def _get_token(self, video_id): - token_page = self._download_webpage( - 'http://ida.omroep.nl/npoplayer/i.js', - video_id, note='Downloading token') - token = self._search_regex( - r'npoplayer\.token = "(.+?)"', token_page, 'token') - # Decryption algorithm extracted from http://npoplayer.omroep.nl/csjs/npoplayer-min.js - token_l = list(token) - first = second = None - for i in range(5, len(token_l) - 4): - if token_l[i].isdigit(): - if first is None: - first = i - elif second is None: - second = i - if first is None or second is None: - first = 12 - second = 13 - - token_l[first], token_l[second] = token_l[second], token_l[first] - - return ''.join(token_l) + return self._download_json( + 'http://ida.omroep.nl/app.php/auth', video_id, + note='Downloading token')['token'] class NPOIE(NPOBaseIE): @@ -187,32 +169,41 @@ class NPOIE(NPOBaseIE): pubopties = metadata.get('pubopties') if pubopties: quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) - for format_id in pubopties: - format_info = self._download_json( - 'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' - % (video_id, format_id, token), - video_id, 'Downloading %s JSON' % format_id) - if format_info.get('error_code', 0) or format_info.get('errorcode', 0): + items = self._download_json( + 'http://ida.omroep.nl/app.php/%s' % video_id, + 'Downloading formats JSON', query={ + 'adaptive': 'yes', + 'token': token, + })['items'][0] + for num, item in enumerate(items): + item_url = item.get('url') + if not item_url: continue - streams = format_info.get('streams') - if streams: - try: - video_info = self._download_json( - streams[0] + '&type=json', - video_id, 'Downloading %s stream JSON' % format_id) - except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: - error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring') - if error: - raise ExtractorError(error, expected=True) - raise - else: - video_info = format_info - video_url = video_info.get('url') + format_id = self._search_regex( + r'video/ida/([^/]+)', item_url, 'format id', + default=None) + try: + stream_info = self._download_json( + item_url + '&type=json', video_id, + 'Downloading %s stream JSON' % item.get('label') or format_id or num) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error = (self._parse_json( + ee.cause.read().decode(), video_id, + fatal=False) or {}).get('errorstring') + if error: + raise ExtractorError(error, expected=True) + raise + if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0): + continue + video_url = stream_info.get('url') if not video_url: continue - if format_id == 'adaptive': - formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4')) + if stream_info.get('family') == 'adaptive': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) else: formats.append({ 'url': video_url, From 83e8fce628e810e2a5639ef9a21be839526512fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Mar 2017 22:14:46 +0700 Subject: [PATCH 0407/1696] [npo] Improve extraction and update tests --- youtube_dl/extractor/npo.py | 402 ++++++++++++++++++------------------ 1 file changed, 197 insertions(+), 205 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 89082c189..be10fc486 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -3,15 +3,19 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( + determine_ext, + ExtractorError, fix_xml_ampersands, orderedSet, parse_duration, qualities, strip_jsonp, unified_strdate, - ExtractorError, ) @@ -40,103 +44,101 @@ class NPOIE(NPOBaseIE): (?P[^/?#]+) ''' - _TESTS = [ - { - 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', - 'md5': '4b3f9c429157ec4775f2c9cb7b911016', - 'info_dict': { - 'id': 'VPWON_1220719', - 'ext': 'm4v', - 'title': 'Nieuwsuur', - 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', - 'upload_date': '20140622', - }, + _TESTS = [{ + 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', + 'md5': '4b3f9c429157ec4775f2c9cb7b911016', + 'info_dict': { + 'id': 'VPWON_1220719', + 'ext': 'm4v', + 'title': 'Nieuwsuur', + 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', + 'upload_date': '20140622', }, - { - 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800', - 'md5': 'da50a5787dbfc1603c4ad80f31c5120b', - 'info_dict': { - 'id': 'VARA_101191800', - 'ext': 'm4v', - 'title': 'De Mega Mike & Mega Thomas show: The best of.', - 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', - 'upload_date': '20090227', - 'duration': 2400, - }, + }, { + 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800', + 'md5': 'da50a5787dbfc1603c4ad80f31c5120b', + 'info_dict': { + 'id': 'VARA_101191800', + 'ext': 'm4v', + 'title': 'De Mega Mike & Mega Thomas show: The best of.', + 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', + 'upload_date': '20090227', + 'duration': 2400, }, - { - 'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289', - 'md5': 'f8065e4e5a7824068ed3c7e783178f2c', - 'info_dict': { - 'id': 'VPWON_1169289', - 'ext': 'm4v', - 'title': 'Tegenlicht: De toekomst komt uit Afrika', - 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', - 'upload_date': '20130225', - 'duration': 3000, - }, + }, { + 'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289', + 'md5': 'f8065e4e5a7824068ed3c7e783178f2c', + 'info_dict': { + 'id': 'VPWON_1169289', + 'ext': 'm4v', + 'title': 'Tegenlicht: Zwart geld. De toekomst komt uit Afrika', + 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', + 'upload_date': '20130225', + 'duration': 3000, }, - { - 'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706', - 'info_dict': { - 'id': 'WO_VPRO_043706', - 'ext': 'wmv', - 'title': 'De nieuwe mens - Deel 1', - 'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b', - 'duration': 4680, - }, - 'params': { - # mplayer mms download - 'skip_download': True, - } + }, { + 'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706', + 'info_dict': { + 'id': 'WO_VPRO_043706', + 'ext': 'm4v', + 'title': 'De nieuwe mens - Deel 1', + 'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b', + 'duration': 4680, }, + 'params': { + 'skip_download': True, + } + }, { # non asf in streams - { - 'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771', - 'md5': 'b3da13de374cbe2d5332a7e910bef97f', - 'info_dict': { - 'id': 'WO_NOS_762771', - 'ext': 'mp4', - 'title': 'Hoe gaat Europa verder na Parijs?', - }, - }, - { - 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content', - 'md5': '01c6a2841675995da1f0cf776f03a9c3', - 'info_dict': { - 'id': 'VPWON_1233944', - 'ext': 'm4v', - 'title': 'Aap, poot, pies', - 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde', - 'upload_date': '20150508', - 'duration': 599, - }, - }, - { - 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698', - 'md5': 'd30cd8417b8b9bca1fdff27428860d08', - 'info_dict': { - 'id': 'POW_00996502', - 'ext': 'm4v', - 'title': '''"Dit is wel een 'landslide'..."''', - 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8', - 'upload_date': '20150508', - 'duration': 462, - }, + 'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771', + 'info_dict': { + 'id': 'WO_NOS_762771', + 'ext': 'mp4', + 'title': 'Hoe gaat Europa verder na Parijs?', }, - { - 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', - 'only_matching': True, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content', + 'info_dict': { + 'id': 'VPWON_1233944', + 'ext': 'm4v', + 'title': 'Aap, poot, pies', + 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde', + 'upload_date': '20150508', + 'duration': 599, }, - { - 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', - 'only_matching': True, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698', + 'info_dict': { + 'id': 'POW_00996502', + 'ext': 'm4v', + 'title': '''"Dit is wel een 'landslide'..."''', + 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8', + 'upload_date': '20150508', + 'duration': 462, }, - { - 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', - 'only_matching': True, + 'params': { + 'skip_download': True, } - ] + }, { + 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', + 'only_matching': True, + }, { + 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', + 'only_matching': True, + }, { + 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', + 'only_matching': True, + }, { + # live stream + 'url': 'npo:LI_NL1_4188102', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -165,79 +167,115 @@ class NPOIE(NPOBaseIE): token = self._get_token(video_id) formats = [] + urls = set() + + quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) + items = self._download_json( + 'http://ida.omroep.nl/app.php/%s' % video_id, video_id, + 'Downloading formats JSON', query={ + 'adaptive': 'yes', + 'token': token, + })['items'][0] + for num, item in enumerate(items): + item_url = item.get('url') + if not item_url or item_url in urls: + continue + urls.add(item_url) + format_id = self._search_regex( + r'video/ida/([^/]+)', item_url, 'format id', + default=None) + + def add_format_url(format_url): + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'quality': quality(format_id), + }) - pubopties = metadata.get('pubopties') - if pubopties: - quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) - items = self._download_json( - 'http://ida.omroep.nl/app.php/%s' % video_id, - 'Downloading formats JSON', query={ - 'adaptive': 'yes', - 'token': token, - })['items'][0] - for num, item in enumerate(items): - item_url = item.get('url') - if not item_url: - continue - format_id = self._search_regex( - r'video/ida/([^/]+)', item_url, 'format id', - default=None) - try: - stream_info = self._download_json( - item_url + '&type=json', video_id, - 'Downloading %s stream JSON' % item.get('label') or format_id or num) - except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: - error = (self._parse_json( - ee.cause.read().decode(), video_id, - fatal=False) or {}).get('errorstring') - if error: - raise ExtractorError(error, expected=True) - raise - if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0): + # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706 + if item.get('contentType') == 'url': + add_format_url(item_url) + continue + + try: + stream_info = self._download_json( + item_url + '&type=json', video_id, + 'Downloading %s stream JSON' + % item.get('label') or format_id or num) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error = (self._parse_json( + ee.cause.read().decode(), video_id, + fatal=False) or {}).get('errorstring') + if error: + raise ExtractorError(error, expected=True) + raise + # Stream URL instead of JSON, example: npo:LI_NL1_4188102 + if isinstance(stream_info, compat_str): + if not stream_info.startswith('http'): continue + video_url = stream_info + # JSON + else: video_url = stream_info.get('url') - if not video_url: + if not video_url or video_url in urls: + continue + urls.add(item_url) + if determine_ext(video_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + else: + add_format_url(video_url) + + is_live = metadata.get('medium') == 'live' + + if not is_live: + for num, stream in enumerate(metadata.get('streams', [])): + stream_url = stream.get('url') + if not stream_url or stream_url in urls: + continue + urls.add(stream_url) + # smooth streaming is not supported + stream_type = stream.get('type', '').lower() + if stream_type in ['ss', 'ms']: continue - if stream_info.get('family') == 'adaptive': + if stream_type == 'hds': + f4m_formats = self._extract_f4m_formats( + stream_url, video_id, fatal=False) + # f4m downloader downloads only piece of live stream + for f4m_format in f4m_formats: + f4m_format['preference'] = -1 + formats.extend(f4m_formats) + elif stream_type == 'hls': formats.extend(self._extract_m3u8_formats( - video_url, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - else: + stream_url, video_id, ext='mp4', fatal=False)) + # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706 + elif '.asf' in stream_url: + asx = self._download_xml( + stream_url, video_id, + 'Downloading stream %d ASX playlist' % num, + transform_source=fix_xml_ampersands, fatal=False) + if not asx: + continue + ref = asx.find('./ENTRY/Ref') + if ref is None: + continue + video_url = ref.get('href') + if not video_url or video_url in urls: + continue + urls.add(video_url) formats.append({ 'url': video_url, - 'format_id': format_id, - 'quality': quality(format_id), + 'ext': stream.get('formaat', 'asf'), + 'quality': stream.get('kwaliteit'), + 'preference': -10, }) - - streams = metadata.get('streams') - if streams: - for i, stream in enumerate(streams): - stream_url = stream.get('url') - if not stream_url: - continue - if '.asf' not in stream_url: + else: formats.append({ 'url': stream_url, 'quality': stream.get('kwaliteit'), }) - continue - asx = self._download_xml( - stream_url, video_id, - 'Downloading stream %d ASX playlist' % i, - transform_source=fix_xml_ampersands) - ref = asx.find('./ENTRY/Ref') - if ref is None: - continue - video_url = ref.get('href') - if not video_url: - continue - formats.append({ - 'url': video_url, - 'ext': stream.get('formaat', 'asf'), - 'quality': stream.get('kwaliteit'), - }) self._sort_formats(formats) @@ -250,28 +288,28 @@ class NPOIE(NPOBaseIE): return { 'id': video_id, - 'title': title, + 'title': self._live_title(title) if is_live else title, 'description': metadata.get('info'), 'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], 'upload_date': unified_strdate(metadata.get('gidsdatum')), 'duration': parse_duration(metadata.get('tijdsduur')), 'formats': formats, 'subtitles': subtitles, + 'is_live': is_live, } class NPOLiveIE(NPOBaseIE): IE_NAME = 'npo.nl:live' - _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P[^/?#&]+)' _TEST = { 'url': 'http://www.npo.nl/live/npo-1', 'info_dict': { - 'id': 'LI_NEDERLAND1_136692', + 'id': 'LI_NL1_4188102', 'display_id': 'npo-1', 'ext': 'mp4', - 'title': 're:^Nederland 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'Livestream', + 'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'is_live': True, }, 'params': { @@ -287,58 +325,12 @@ class NPOLiveIE(NPOBaseIE): live_id = self._search_regex( r'data-prid="([^"]+)"', webpage, 'live id') - metadata = self._download_json( - 'http://e.omroep.nl/metadata/%s' % live_id, - display_id, transform_source=strip_jsonp) - - token = self._get_token(display_id) - - formats = [] - - streams = metadata.get('streams') - if streams: - for stream in streams: - stream_type = stream.get('type').lower() - # smooth streaming is not supported - if stream_type in ['ss', 'ms']: - continue - stream_info = self._download_json( - 'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp' - % (stream.get('url'), token), - display_id, 'Downloading %s JSON' % stream_type) - if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0): - continue - stream_url = self._download_json( - stream_info['stream'], display_id, - 'Downloading %s URL' % stream_type, - 'Unable to download %s URL' % stream_type, - transform_source=strip_jsonp, fatal=False) - if not stream_url: - continue - if stream_type == 'hds': - f4m_formats = self._extract_f4m_formats(stream_url, display_id) - # f4m downloader downloads only piece of live stream - for f4m_format in f4m_formats: - f4m_format['preference'] = -1 - formats.extend(f4m_formats) - elif stream_type == 'hls': - formats.extend(self._extract_m3u8_formats(stream_url, display_id, 'mp4')) - else: - formats.append({ - 'url': stream_url, - 'preference': -10, - }) - - self._sort_formats(formats) - return { + '_type': 'url_transparent', + 'url': 'npo:%s' % live_id, + 'ie_key': NPOIE.ie_key(), 'id': live_id, 'display_id': display_id, - 'title': self._live_title(metadata['titel']), - 'description': metadata['info'], - 'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], - 'formats': formats, - 'is_live': True, } From 4b8a984c67cdc1b2bfde77398d74096406db9644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Mar 2017 22:21:13 +0700 Subject: [PATCH 0408/1696] [npo] Add support for audio --- youtube_dl/extractor/npo.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index be10fc486..38fefe492 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -125,6 +125,18 @@ class NPOIE(NPOBaseIE): 'params': { 'skip_download': True, } + }, { + # audio + 'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437', + 'info_dict': { + 'id': 'RBX_FUNX_6683215', + 'ext': 'mp3', + 'title': 'Jouw Stad Rotterdam', + 'description': 'md5:db251505244f097717ec59fabc372d9f', + }, + 'params': { + 'skip_download': True, + } }, { 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', 'only_matching': True, @@ -193,7 +205,7 @@ class NPOIE(NPOBaseIE): }) # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706 - if item.get('contentType') == 'url': + if item.get('contentType') in ('url', 'audio'): add_format_url(item_url) continue @@ -201,7 +213,7 @@ class NPOIE(NPOBaseIE): stream_info = self._download_json( item_url + '&type=json', video_id, 'Downloading %s stream JSON' - % item.get('label') or format_id or num) + % item.get('label') or item.get('format') or format_id or num) except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: error = (self._parse_json( From 40df485f554ec3fff81ca988b5bf961d54d8e41b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Mar 2017 23:03:36 +0700 Subject: [PATCH 0409/1696] [YoutubeDL] Don't sanitize identifiers (closes #12317) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f7254560c..13a3a909e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -616,7 +616,7 @@ class YoutubeDL(object): sanitize = lambda k, v: sanitize_filename( compat_str(v), restricted=self.params.get('restrictfilenames'), - is_id=(k == 'id')) + is_id=(k == 'id' or k.endswith('_id'))) template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v)) for k, v in template_dict.items() if v is not None and not isinstance(v, (list, tuple, dict))) From 158af5242e983312c0c1e7590faa9844136e338f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 1 Mar 2017 23:04:02 +0700 Subject: [PATCH 0410/1696] [utils] Carry long doc string --- youtube_dl/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 807183f4a..8738aa249 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -473,7 +473,8 @@ def timeconvert(timestr): def sanitize_filename(s, restricted=False, is_id=False): """Sanitizes a string so it could be used as part of a filename. If restricted is set, use a stricter subset of allowed characters. - Set is_id if this is not an arbitrary string, but an ID that should be kept if possible + Set is_id if this is not an arbitrary string, but an ID that should be kept + if possible. """ def replace_insane(char): if restricted and char in ACCENT_CHARS: From af5049f128655cfec8978f17b04e5d88bd91d37f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 2 Mar 2017 02:14:42 +0800 Subject: [PATCH 0411/1696] [adobepass] Add Charter Spectrum (#11465) Thanks @tv21 for the fix! --- youtube_dl/extractor/adobepass.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 4d655bd5e..d4816abf5 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -36,6 +36,11 @@ MSO_INFO = { 'username_field': 'Ecom_User_ID', 'password_field': 'Ecom_Password', }, + 'Charter_Direct': { + 'name': 'Charter Spectrum', + 'username_field': 'IDToken1', + 'password_field': 'IDToken2', + }, 'thr030': { 'name': '3 Rivers Communications' }, From c9612c04872656125108980dc61d71ba6b3a4f89 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 2 Mar 2017 16:59:12 +0800 Subject: [PATCH 0412/1696] [youtube] Mark errors about rental videos as expected Closes #12324 --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 81c793921..caa048249 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1454,7 +1454,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: - raise ExtractorError('"rental" videos not supported') + raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True) # Start extracting information self.report_information_extraction(video_id) From 11bb6ad1a5ae3767535720e7dfebb823e8167088 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 2 Mar 2017 20:49:39 +0800 Subject: [PATCH 0413/1696] [facebook] Fix extraction (closes #12323) Almost all videos now use the pagelet type 'permalink_video_pagelet' --- ChangeLog | 6 ++++++ youtube_dl/extractor/facebook.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 2a1fd9018..448d7f521 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [facebook] Fix extraction (#12323) + + version 2017.02.28 Core diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 70b8c95c5..6315d40c5 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -303,7 +303,7 @@ class FacebookIE(InfoExtractor): if not video_data: server_js_data = self._parse_json( self._search_regex( - r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)', + r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)', webpage, 'js data', default='{}'), video_id, transform_source=js_to_json, fatal=False) if server_js_data: From 28d15b73f8704c6d00efa14948da29843fdb76e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 2 Mar 2017 22:29:56 +0700 Subject: [PATCH 0414/1696] [ChangeLog] Actualize --- ChangeLog | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 448d7f521..e3e0f4544 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,14 @@ version +Core ++ [adobepass] Add support for Charter Spectrum (#11465) +* [YoutubeDL] Don't sanitize identifiers in output template (#12317) + Extractors -* [facebook] Fix extraction (#12323) +* [facebook] Fix extraction (#12323, #12330) +* [youtube] Mark errors about rental videos as expected (#12324) ++ [npo] Add support for audio +* [npo] Adapt to app.php API (#12311, #12320) version 2017.02.28 From 250eea6821a5715e2ee7cade8539fcd42177603d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 2 Mar 2017 22:33:22 +0700 Subject: [PATCH 0415/1696] release 2017.03.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c907ef931..fec4152e3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.28** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.28 +[debug] youtube-dl version 2017.03.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e3e0f4544..e53fb7767 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.03.02 Core + [adobepass] Add support for Charter Spectrum (#11465) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 340e23bf8..f4c8d3d5f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.28' +__version__ = '2017.03.02' From 4d345bf17b3040ebfedd079e656e1ca658749187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 2 Mar 2017 23:53:46 +0700 Subject: [PATCH 0416/1696] [ruutu] Disable DASH formats (closes #12322) Due to causing out of sync issue --- youtube_dl/extractor/ruutu.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index 20d01754a..6c09df25a 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -82,6 +82,9 @@ class RuutuIE(InfoExtractor): formats.extend(self._extract_f4m_formats( video_url, video_id, f4m_id='hds', fatal=False)) elif ext == 'mpd': + # video-only and audio-only streams are of different + # duration resulting in out of sync issue + continue formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) else: From 9bae185ba6be0b8ffe1d2168937c9d3274c5b60f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Mar 2017 22:16:00 +0700 Subject: [PATCH 0417/1696] [24video] Use original host for requests (closes #12339) --- youtube_dl/extractor/twentyfourvideo.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index f3541b654..7af11659f 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( parse_iso8601, @@ -12,7 +14,7 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex|tube)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P\d+)' + _VALID_URL = r'https?://(?P(?:www\.)?24video\.(?:net|me|xxx|sex|tube))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P\d+)' _TESTS = [{ 'url': 'http://www.24video.net/video/view/1044982', @@ -43,10 +45,12 @@ class TwentyFourVideoIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + host = mobj.group('host') webpage = self._download_webpage( - 'http://www.24video.sex/video/view/%s' % video_id, video_id) + 'http://%s/video/view/%s' % (host, video_id), video_id) title = self._og_search_title(webpage) description = self._html_search_regex( @@ -72,11 +76,11 @@ class TwentyFourVideoIE(InfoExtractor): # Sets some cookies self._download_xml( - r'http://www.24video.sex/video/xml/%s?mode=init' % video_id, + r'http://%s/video/xml/%s?mode=init' % (host, video_id), video_id, 'Downloading init XML') video_xml = self._download_xml( - 'http://www.24video.sex/video/xml/%s?mode=play' % video_id, + 'http://%s/video/xml/%s?mode=play' % (host, video_id), video_id, 'Downloading video XML') video = xpath_element(video_xml, './/video', 'video', fatal=True) From 692fa200cae38a7e37f646118a268ad408c8ab95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Mar 2017 22:28:34 +0700 Subject: [PATCH 0418/1696] [go] Relax _VALID_URL (closes #12341) --- youtube_dl/extractor/go.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 21ed846b2..4c9be47b4 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -36,7 +36,7 @@ class GoIE(AdobePassIE): 'requestor_id': 'DisneyXD', } } - _VALID_URL = r'https?://(?:(?P%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P\w+)|season-\d+/\d+-(?P[^/?#]+))' % '|'.join(_SITE_INFO.keys()) + _VALID_URL = r'https?://(?:(?P%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P\w+)|(?:[^/]+/)*(?P[^/?#]+))' % '|'.join(_SITE_INFO.keys()) _TESTS = [{ 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', 'info_dict': { @@ -52,6 +52,12 @@ class GoIE(AdobePassIE): }, { 'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601', 'only_matching': True, + }, { + 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', + 'only_matching': True, + }, { + 'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland', + 'only_matching': True, }] def _real_extract(self, url): From d02d4fa0a90f3182d65504508105e8d86886c6ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Mar 2017 22:49:48 +0700 Subject: [PATCH 0419/1696] [brightcove:new] Raise GeoRestrictedError --- youtube_dl/extractor/brightcove.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 27685eed0..f8605be82 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -544,8 +544,10 @@ class BrightcoveNewIE(InfoExtractor): except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: json_data = self._parse_json(e.cause.read().decode(), video_id)[0] - raise ExtractorError( - json_data.get('message') or json_data['error_code'], expected=True) + message = json_data.get('message') or json_data['error_code'] + if json_data.get('error_subcode') == 'CLIENT_GEO': + self.raise_geo_restricted(msg=message) + raise ExtractorError(message, expected=True) raise title = json_data['name'].strip() From cbb127568a6182df2c5a2d65426de523f1f7b43f Mon Sep 17 00:00:00 2001 From: Olivier Bilodeau Date: Thu, 15 Dec 2016 20:14:04 -0500 Subject: [PATCH 0420/1696] [vrak] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vrak.py | 68 ++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 youtube_dl/extractor/vrak.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1613a9d3..0ac42138a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1165,6 +1165,7 @@ from .voicerepublic import VoiceRepublicIE from .voxmedia import VoxMediaIE from .vporn import VpornIE from .vrt import VRTIE +from .vrak import VrakIE from .vube import VubeIE from .vuclip import VuClipIE from .vvvvid import VVVVIDIE diff --git a/youtube_dl/extractor/vrak.py b/youtube_dl/extractor/vrak.py new file mode 100644 index 000000000..692e2fcfc --- /dev/null +++ b/youtube_dl/extractor/vrak.py @@ -0,0 +1,68 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from .brightcove import BrightcoveNewIE + + +class VrakIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?target=(?P[0-9\.]+).*' + _TEST = { + 'url': 'http://www.vrak.tv/videos?target=1.2240923&filtre=emission&id=1.1806721', + 'md5': 'c5d5ce237bca3b1e990ce1b48d1f0948', + 'info_dict': { + 'id': '5231040869001', + 'ext': 'mp4', + 'title': 'Référendums américains, animés japonais et hooligans russes', + 'upload_date': '20161201', + 'description': 'This video file has been uploaded automatically using Oprah. It should be updated with real description soon.', + 'timestamp': 1480628425, + 'uploader_id': '2890187628001', + } + } + + def _real_extract(self, url): + url_id = self._match_id(url) + webpage = self._download_webpage(url, url_id) + + result = {} + result['title'] = self._html_search_regex( + r'

(.+?)

', webpage, 'title') + + # Inspired from BrightcoveNewIE._extract_url() + entries = [] + for account_id, player_id, _, video_id in re.findall( + # account_id, player_id and embed from: + #
]+ + data-publisher-id=["\'](\d+)["\'] + [^>]* + data-player-id=["\']([^"\']+)["\'] + [^>]* + refId":"([^&]+)" + [^>]* + >.*? +
.*? + RW\ java\.lang\.String\ value\ =\ \'brightcove\.article\.\d+\.\3\' + [^>]* + RW\ java\.lang\.String\ value\ =\ \'(\d+)\' + ''', webpage): + + entries.append( + 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' + % (account_id, player_id, 'default', video_id)) + + if entries: + result = self.url_result(entries[0], BrightcoveNewIE.ie_key()) + + return result From d16f27ca272cb10f4de87814665402b9737175ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Mar 2017 23:57:01 +0700 Subject: [PATCH 0421/1696] [brightcove:new] Add ability to smuggle geo_countries into URL --- youtube_dl/extractor/brightcove.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index f8605be82..66c8cb219 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -515,6 +515,9 @@ class BrightcoveNewIE(InfoExtractor): return entries def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass(smuggled_data.get('geo_countries')) + account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage( From 4d058c9862ebcb1fb7ebd988cf053fde200913cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Mar 2017 23:58:16 +0700 Subject: [PATCH 0422/1696] [vrak] Improve and update test (closes #11452) --- youtube_dl/extractor/vrak.py | 104 +++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 46 deletions(-) diff --git a/youtube_dl/extractor/vrak.py b/youtube_dl/extractor/vrak.py index 692e2fcfc..daa247cce 100644 --- a/youtube_dl/extractor/vrak.py +++ b/youtube_dl/extractor/vrak.py @@ -4,65 +4,77 @@ from __future__ import unicode_literals import re from .common import InfoExtractor - from .brightcove import BrightcoveNewIE +from ..utils import ( + int_or_none, + parse_age_limit, + smuggle_url, + unescapeHTML, +) class VrakIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?target=(?P[0-9\.]+).*' + _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?\btarget=(?P[\d.]+)' _TEST = { - 'url': 'http://www.vrak.tv/videos?target=1.2240923&filtre=emission&id=1.1806721', - 'md5': 'c5d5ce237bca3b1e990ce1b48d1f0948', + 'url': 'http://www.vrak.tv/videos?target=1.2306782&filtre=emission&id=1.1806721', 'info_dict': { - 'id': '5231040869001', + 'id': '5345661243001', 'ext': 'mp4', - 'title': 'Référendums américains, animés japonais et hooligans russes', - 'upload_date': '20161201', - 'description': 'This video file has been uploaded automatically using Oprah. It should be updated with real description soon.', - 'timestamp': 1480628425, + 'title': 'Obésité, film de hockey et Roseline Filion', + 'timestamp': 1488492126, + 'upload_date': '20170302', 'uploader_id': '2890187628001', - } + 'creator': 'VRAK.TV', + 'age_limit': 8, + 'series': 'ALT (Actualité Légèrement Tordue)', + 'episode': 'Obésité, film de hockey et Roseline Filion', + 'tags': list, + }, + 'params': { + 'skip_download': True, + }, } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2890187628001/default_default/index.html?videoId=%s' def _real_extract(self, url): - url_id = self._match_id(url) - webpage = self._download_webpage(url, url_id) + video_id = self._match_id(url) - result = {} - result['title'] = self._html_search_regex( - r'

(.+?)

', webpage, 'title') + webpage = self._download_webpage(url, video_id) - # Inspired from BrightcoveNewIE._extract_url() - entries = [] - for account_id, player_id, _, video_id in re.findall( - # account_id, player_id and embed from: - #
]+ - data-publisher-id=["\'](\d+)["\'] - [^>]* - data-player-id=["\']([^"\']+)["\'] - [^>]* - refId":"([^&]+)" - [^>]* - >.*? -
.*? - RW\ java\.lang\.String\ value\ =\ \'brightcove\.article\.\d+\.\3\' - [^>]* - RW\ java\.lang\.String\ value\ =\ \'(\d+)\' - ''', webpage): + title = self._html_search_regex( + r']+\bclass=["\']videoTitle["\'][^>]*>([^<]+)', + webpage, 'title', default=None) or self._og_search_title(webpage) - entries.append( - 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' - % (account_id, player_id, 'default', video_id)) + content = self._parse_json( + self._search_regex( + r'data-player-options-content=(["\'])(?P{.+?})\1', + webpage, 'content', default='{}', group='content'), + video_id, transform_source=unescapeHTML) - if entries: - result = self.url_result(entries[0], BrightcoveNewIE.ie_key()) + ref_id = content.get('refId') or self._search_regex( + r'refId":"([^&]+)"', webpage, 'ref id') - return result + brightcove_id = self._search_regex( + r'''(?x) + java\.lang\.String\s+value\s*=\s*["']brightcove\.article\.\d+\.%s + [^>]* + java\.lang\.String\s+value\s*=\s*["'](\d+) + ''' % re.escape(ref_id), webpage, 'brightcove id') + + return { + '_type': 'url_transparent', + 'ie_key': BrightcoveNewIE.ie_key(), + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['CA']}), + 'id': brightcove_id, + 'description': content.get('description'), + 'creator': content.get('brand'), + 'age_limit': parse_age_limit(content.get('rating')), + 'series': content.get('showName') or content.get( + 'episodeName'), # this is intentional + 'season_number': int_or_none(content.get('seasonNumber')), + 'episode': title, + 'episode_number': int_or_none(content.get('episodeNumber')), + 'tags': content.get('tags', []), + } From bc82f22879e222a1ade35fd8ebd7bb535f9166dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Mar 2017 00:42:51 +0700 Subject: [PATCH 0423/1696] [rutube] Relax _VALID_URL --- youtube_dl/extractor/rutube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index fd1df925b..663b75583 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -17,7 +17,7 @@ from ..utils import ( class RutubeIE(InfoExtractor): IE_NAME = 'rutube' IE_DESC = 'Rutube videos' - _VALID_URL = r'https?://rutube\.ru/(?:video|play/embed)/(?P[\da-z]{32})' + _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P[\da-z]{32})' _TESTS = [{ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', @@ -39,6 +39,9 @@ class RutubeIE(InfoExtractor): }, { 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 'only_matching': True, + }, { + 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', + 'only_matching': True, }] def _real_extract(self, url): From eb3079b6ce54b63b4cc609198382b6db2cbb6f5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Mar 2017 00:46:33 +0700 Subject: [PATCH 0424/1696] [generic] Add support for rutube embeds --- youtube_dl/extractor/generic.py | 24 ++++++++++++++++++++++++ youtube_dl/extractor/rutube.py | 6 ++++++ 2 files changed, 30 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9868ca6d0..ebab9509d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -84,6 +84,7 @@ from .twentymin import TwentyMinutenIE from .ustream import UstreamIE from .openload import OpenloadIE from .videopress import VideoPressIE +from .rutube import RutubeIE class GenericIE(InfoExtractor): @@ -1502,6 +1503,23 @@ class GenericIE(InfoExtractor): }, 'add_ie': [VideoPressIE.ie_key()], }, + { + # Rutube embed + 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2', + 'info_dict': { + 'id': '9b3d5bee0a8740bf70dfd29d3ea43541', + 'ext': 'flv', + 'title': 'Магаззино: Казань 2', + 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a', + 'uploader': 'Магаззино', + 'upload_date': '20170228', + 'uploader_id': '996642', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [RutubeIE.ie_key()], + }, { # ThePlatform embedded with whitespaces in URLs 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm', @@ -2480,6 +2498,12 @@ class GenericIE(InfoExtractor): return _playlist_from_matches( videopress_urls, ie=VideoPressIE.ie_key()) + # Look for Rutube embeds + rutube_urls = RutubeIE._extract_urls(webpage) + if rutube_urls: + return _playlist_from_matches( + rutube_urls, ie=RutubeIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 663b75583..889fa7628 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -44,6 +44,12 @@ class RutubeIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [mobj.group('url') for mobj in re.finditer( + r']+?src=(["\'])(?P(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1', + webpage)] + def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( From 6f4e4132d8ef835635059d08206ca9bc6fd5dd98 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 4 Mar 2017 23:23:18 +0800 Subject: [PATCH 0425/1696] [douyutv] Switch to the PC API to escape the 5-min limitation Thanks @spacemeowx2 for the algo. Ref: https://gist.github.com/spacemeowx2/629b1d131bd7e240a7d28742048e80fc Closes #12316 --- ChangeLog | 6 ++++++ youtube_dl/extractor/douyutv.py | 31 +++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index e53fb7767..13ccb0f8f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [douyutv] Switch to the PC API to escape the 5-min limitation (#12316) + + version 2017.03.02 Core diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index 9a83fb31a..82d8a042f 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -1,6 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals +import time +import hashlib + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -16,7 +19,7 @@ class DouyuTVIE(InfoExtractor): 'info_dict': { 'id': '17732', 'display_id': 'iseven', - 'ext': 'mp4', + 'ext': 'flv', 'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': r're:.*m7show@163\.com.*', 'thumbnail': r're:^https?://.*\.jpg$', @@ -31,7 +34,7 @@ class DouyuTVIE(InfoExtractor): 'info_dict': { 'id': '85982', 'display_id': '85982', - 'ext': 'mp4', + 'ext': 'flv', 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', 'thumbnail': r're:^https?://.*\.jpg$', @@ -47,7 +50,7 @@ class DouyuTVIE(InfoExtractor): 'info_dict': { 'id': '17732', 'display_id': '17732', - 'ext': 'mp4', + 'ext': 'flv', 'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': r're:.*m7show@163\.com.*', 'thumbnail': r're:^https?://.*\.jpg$', @@ -66,10 +69,6 @@ class DouyuTVIE(InfoExtractor): 'only_matching': True, }] - # Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf - # is encrypted originally, but ffdec can dump memory to get the decrypted one. - _API_KEY = 'A12Svb&%1UUmf@hC' - def _real_extract(self, url): video_id = self._match_id(url) @@ -80,6 +79,7 @@ class DouyuTVIE(InfoExtractor): room_id = self._html_search_regex( r'"room_id\\?"\s*:\s*(\d+),', page, 'room id') + # Grab metadata from mobile API room = self._download_json( 'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id, note='Downloading room info')['data'] @@ -88,8 +88,19 @@ class DouyuTVIE(InfoExtractor): if room.get('show_status') == '2': raise ExtractorError('Live stream is offline', expected=True) - formats = self._extract_m3u8_formats( - room['hls_url'], video_id, ext='mp4') + # Grab the URL from PC client API + # The m3u8 url from mobile API requires re-authentication every 5 minutes + tt = int(time.time()) + signContent = 'lapi/live/thirdPart/getPlay/%s?aid=pcclient&rate=0&time=%d9TUk5fjjUjg9qIMH3sdnh' % (room_id, tt) + sign = hashlib.md5(signContent.encode('ascii')).hexdigest() + video_url = self._download_json( + 'http://coapi.douyucdn.cn/lapi/live/thirdPart/getPlay/' + room_id, + video_id, note='Downloading video URL info', + query={'rate': 0}, headers={ + 'auth': sign, + 'time': str(tt), + 'aid': 'pcclient' + })['data']['live_url'] title = self._live_title(unescapeHTML(room['room_name'])) description = room.get('show_details') @@ -99,7 +110,7 @@ class DouyuTVIE(InfoExtractor): return { 'id': room_id, 'display_id': video_id, - 'formats': formats, + 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, From 64b7ccef3e3144a50f2cc01772a5ea5e81d4494d Mon Sep 17 00:00:00 2001 From: Juanjo Benages Date: Thu, 2 Feb 2017 17:33:09 +0100 Subject: [PATCH 0426/1696] [redbulltv] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/redbull.py | 50 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 youtube_dl/extractor/redbull.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0ac42138a..e42a069b5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -793,6 +793,7 @@ from .rai import ( ) from .rbmaradio import RBMARadioIE from .rds import RDSIE +from .redbull import RedBullIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/youtube_dl/extractor/redbull.py b/youtube_dl/extractor/redbull.py new file mode 100644 index 000000000..e3d978a53 --- /dev/null +++ b/youtube_dl/extractor/redbull.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals +import re + +from .common import InfoExtractor + + +class RedBullIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?PAP-\w+)' + _TEST = { + 'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc', + 'md5': '78e860f631d7a846e712fab8c5fe2c38', + 'info_dict': { + 'id': 'AP-1Q756YYX51W11', + 'ext': 'mp4', + 'title': 'ABC of...WRC', + 'description': 'Buckle up for a crash course in the terminology, rules, drivers, and courses of the World Rally Championship.' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + access_token = self._download_json( + 'http://api-v2.redbull.tv/start?build=4.0.9&category=smartphone&os_version=23&os_family=android', + video_id, note='Downloading access token', + )['auth']['access_token'] + + info = self._download_json( + 'https://api-v2.redbull.tv/views/%s' % video_id, + video_id, note='Downloading video information', + headers={'Authorization': 'Bearer ' + access_token} + )['blocks'][0]['top'][0] + + m3u8_url = info['video_product']['url'] + title = info['title'] + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls') + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': info.get('short_description'), + 'genre': info.get('genre'), + 'duration': info.get('duration') + } From 054a587de8fe2860bcb93a19f8c628b4ddd9ad56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Mar 2017 23:25:09 +0700 Subject: [PATCH 0427/1696] [redbulltv] Improve extraction (closes #11948, closes #3919) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/redbull.py | 50 -------------- youtube_dl/extractor/redbulltv.py | 106 +++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 51 deletions(-) delete mode 100644 youtube_dl/extractor/redbull.py create mode 100644 youtube_dl/extractor/redbulltv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e42a069b5..caf1dc766 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -793,7 +793,7 @@ from .rai import ( ) from .rbmaradio import RBMARadioIE from .rds import RDSIE -from .redbull import RedBullIE +from .redbulltv import RedBullTVIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/youtube_dl/extractor/redbull.py b/youtube_dl/extractor/redbull.py deleted file mode 100644 index e3d978a53..000000000 --- a/youtube_dl/extractor/redbull.py +++ /dev/null @@ -1,50 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals -import re - -from .common import InfoExtractor - - -class RedBullIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?PAP-\w+)' - _TEST = { - 'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc', - 'md5': '78e860f631d7a846e712fab8c5fe2c38', - 'info_dict': { - 'id': 'AP-1Q756YYX51W11', - 'ext': 'mp4', - 'title': 'ABC of...WRC', - 'description': 'Buckle up for a crash course in the terminology, rules, drivers, and courses of the World Rally Championship.' - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - access_token = self._download_json( - 'http://api-v2.redbull.tv/start?build=4.0.9&category=smartphone&os_version=23&os_family=android', - video_id, note='Downloading access token', - )['auth']['access_token'] - - info = self._download_json( - 'https://api-v2.redbull.tv/views/%s' % video_id, - video_id, note='Downloading video information', - headers={'Authorization': 'Bearer ' + access_token} - )['blocks'][0]['top'][0] - - m3u8_url = info['video_product']['url'] - title = info['title'] - - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls') - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': info.get('short_description'), - 'genre': info.get('genre'), - 'duration': info.get('duration') - } diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py new file mode 100644 index 000000000..5c73d5bca --- /dev/null +++ b/youtube_dl/extractor/redbulltv.py @@ -0,0 +1,106 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + int_or_none, + try_get, + unified_timestamp, +) + + +class RedBullTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film)/(?PAP-\w+)' + _TESTS = [{ + # film + 'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc', + 'md5': '78e860f631d7a846e712fab8c5fe2c38', + 'info_dict': { + 'id': 'AP-1Q756YYX51W11', + 'ext': 'mp4', + 'title': 'ABC of...WRC', + 'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31', + 'duration': 1582.04, + 'timestamp': 1488405786, + 'upload_date': '20170301', + }, + }, { + # episode + 'url': 'https://www.redbull.tv/video/AP-1PMT5JCWH1W11/grime?playlist=shows:shows-playall:web', + 'info_dict': { + 'id': 'AP-1PMT5JCWH1W11', + 'ext': 'mp4', + 'title': 'Grime - Hashtags S2 E4', + 'description': 'md5:334b741c8c1ce65be057eab6773c1cf5', + 'duration': 904.6, + 'timestamp': 1487290093, + 'upload_date': '20170217', + 'series': 'Hashtags', + 'season_number': 2, + 'episode_number': 4, + }, + }, { + 'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + access_token = self._download_json( + 'https://api-v2.redbull.tv/start', video_id, + note='Downloading access token', query={ + 'build': '4.0.9', + 'category': 'smartphone', + 'os_version': 23, + 'os_family': 'android', + })['auth']['access_token'] + + info = self._download_json( + 'https://api-v2.redbull.tv/views/%s' % video_id, + video_id, note='Downloading video information', + headers={'Authorization': 'Bearer ' + access_token} + )['blocks'][0]['top'][0] + + video = info['video_product'] + + title = info['title'].strip() + m3u8_url = video['url'] + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + subtitles = {} + for _, captions in (try_get( + video, lambda x: x['attachments']['captions'], + dict) or {}).items(): + if not captions or not isinstance(captions, list): + continue + for caption in captions: + caption_url = caption.get('url') + if not caption_url: + continue + subtitles.setdefault(caption.get('lang') or 'en', []).append({ + 'url': caption_url, + 'ext': caption.get('format'), + }) + + subheading = info.get('subheading') + if subheading: + title += ' - %s' % subheading + + return { + 'id': video_id, + 'title': title, + 'description': info.get('long_description') or info.get( + 'short_description'), + 'duration': float_or_none(video.get('duration'), scale=1000), + 'timestamp': unified_timestamp(info.get('published')), + 'series': info.get('show_title'), + 'season_number': int_or_none(info.get('season_number')), + 'episode_number': int_or_none(info.get('episode_number')), + 'formats': formats, + 'subtitles': subtitles, + } From a3ba8a7acfa2db3a8c90000d377c25d14bdad290 Mon Sep 17 00:00:00 2001 From: Lars Vierbergen Date: Sat, 4 Mar 2017 17:47:19 +0100 Subject: [PATCH 0428/1696] [vier] Add support for vijf.be vier.be and vijf.be run on the same CMS and are property of the same company, so the same extractor can be used for both of them. --- youtube_dl/extractor/vier.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index d26fb49b3..5086f591e 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -9,7 +9,7 @@ from .common import InfoExtractor class VierIE(InfoExtractor): IE_NAME = 'vier' - _VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P[^/]+)(?:/(?P\d+))?|video/v3/embed/(?P\d+))' + _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/(?:[^/]+/videos/(?P[^/]+)(?:/(?P\d+))?|video/v3/embed/(?P\d+))' _TESTS = [{ 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', 'info_dict': { @@ -23,6 +23,19 @@ class VierIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614', + 'info_dict': { + 'id': '2561614', + 'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas', + 'ext': 'mp4', + 'title': 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s', + 'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', 'only_matching': True, @@ -35,6 +48,7 @@ class VierIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) embed_id = mobj.group('embed_id') display_id = mobj.group('display_id') or embed_id + site = mobj.group('site') webpage = self._download_webpage(url, display_id) @@ -43,7 +57,7 @@ class VierIE(InfoExtractor): webpage, 'video id') application = self._search_regex( [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], - webpage, 'application', default='vier_vod') + webpage, 'application', default=site + '_vod') filename = self._search_regex( [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], webpage, 'filename') @@ -68,13 +82,19 @@ class VierIE(InfoExtractor): class VierVideosIE(InfoExtractor): IE_NAME = 'vier:videos' - _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P[^/]+)/videos(?:\?.*\bpage=(?P\d+)|$)' + _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/(?P[^/]+)/videos(?:\?.*\bpage=(?P\d+)|$)' _TESTS = [{ 'url': 'http://www.vier.be/demoestuin/videos', 'info_dict': { 'id': 'demoestuin', }, 'playlist_mincount': 153, + }, { + 'url': 'http://www.vijf.be/temptationisland/videos', + 'info_dict': { + 'id': 'temptationisland', + }, + 'playlist_mincount': 159, }, { 'url': 'http://www.vier.be/demoestuin/videos?page=6', 'info_dict': { @@ -92,6 +112,7 @@ class VierVideosIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) program = mobj.group('program') + site = mobj.group('site') page_id = mobj.group('page') if page_id: @@ -105,13 +126,13 @@ class VierVideosIE(InfoExtractor): entries = [] for current_page_id in itertools.count(start_page): current_page = self._download_webpage( - 'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id), + 'http://www.%s.be/%s/videos?page=%d' % (site, program, current_page_id), program, 'Downloading page %d' % (current_page_id + 1)) page_entries = [ - self.url_result('http://www.vier.be' + video_url, 'Vier') + self.url_result('http://www.' + site + '.be' + video_url, 'Vier') for video_url in re.findall( - r'

', current_page)] + r'', current_page)] entries.extend(page_entries) if page_id or '>Meer<' not in current_page: break From 30f8f142d4563df9cf47b0164adbb2c9e0130c5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Mar 2017 23:57:03 +0700 Subject: [PATCH 0429/1696] Credit @ThomasChr for #12015 and #12245 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 247c0ea13..3d8db1ee2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -202,3 +202,4 @@ Fabian Stahl Bagira Odd Stråbø Philip Herzog +Thomas Christlieb From 466274fe9a8fae0a5f5e0358f48e54f569c10c5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Mar 2017 23:58:12 +0700 Subject: [PATCH 0430/1696] Credit @p2004a for vodpl (#12122) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 3d8db1ee2..2a0938f3b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -203,3 +203,4 @@ Bagira Odd Stråbø Philip Herzog Thomas Christlieb +Marek Rusinowski From f24c1e558456021d9a8704c9964c6a704e8b73ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Mar 2017 23:59:49 +0700 Subject: [PATCH 0431/1696] Credit @TobiX for #9725 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 2a0938f3b..0716d2ad6 100644 --- a/AUTHORS +++ b/AUTHORS @@ -204,3 +204,4 @@ Odd Stråbø Philip Herzog Thomas Christlieb Marek Rusinowski +Tobias Gruetzmacher From 6f211dc936dcd1f8ab6f178f8696f5edf2f385a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 00:01:17 +0700 Subject: [PATCH 0432/1696] Credit @obilodeau for vrak (#11452) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 0716d2ad6..4f383e214 100644 --- a/AUTHORS +++ b/AUTHORS @@ -205,3 +205,4 @@ Philip Herzog Thomas Christlieb Marek Rusinowski Tobias Gruetzmacher +Olivier Bilodeau From bcefc59279d9d4d7034e0f25a3fcf561a456766d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 00:02:30 +0700 Subject: [PATCH 0433/1696] Credit @vierbergenlars for vijf.be (#12304) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 4f383e214..fd1b8c9ea 100644 --- a/AUTHORS +++ b/AUTHORS @@ -206,3 +206,4 @@ Thomas Christlieb Marek Rusinowski Tobias Gruetzmacher Olivier Bilodeau +Lars Vierbergen From c64c03be35adae05740058e449c205120f89910d Mon Sep 17 00:00:00 2001 From: Xiao Di Guan Date: Thu, 16 Feb 2017 17:46:54 +1100 Subject: [PATCH 0434/1696] [twitch] Add basic support for two-factor authentication --- youtube_dl/extractor/twitch.py | 81 ++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index bbba394b0..096a2ac9d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -12,7 +12,6 @@ from ..compat import ( compat_str, compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, - compat_urlparse, ) from ..utils import ( clean_html, @@ -24,6 +23,7 @@ from ..utils import ( parse_iso8601, update_url_query, urlencode_postdata, + urljoin, ) @@ -32,7 +32,7 @@ class TwitchBaseIE(InfoExtractor): _API_BASE = 'https://api.twitch.tv' _USHER_BASE = 'https://usher.ttvnw.net' - _LOGIN_URL = 'http://www.twitch.tv/login' + _LOGIN_URL = 'https://www.twitch.tv/login' _CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6' _NETRC_MACHINE = 'twitch' @@ -64,6 +64,36 @@ class TwitchBaseIE(InfoExtractor): raise ExtractorError( 'Unable to login. Twitch said: %s' % message, expected=True) + def post_login_form(page, urlh, note, data): + form = self._hidden_inputs(page) + form.update(data) + + page_url = urlh.geturl() + post_url = self._search_regex( + r']+action=(["\'])(?P.+?)\1', page, + 'post url', default=page_url, group='url') + post_url = urljoin(page_url, post_url) + + headers = {'Referer': page_url} + + try: + response = self._download_json( + post_url, None, note, + data=urlencode_postdata(form), + headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + response = self._parse_json( + e.cause.read().decode('utf-8'), None) + fail(response['message']) + raise + + if response.get('redirect'): + redirect_url = urljoin(post_url, response['redirect']) + return self._download_webpage_handle( + redirect_url, None, 'Downloading login redirect page', + headers=headers) + login_page, handle = self._download_webpage_handle( self._LOGIN_URL, None, 'Downloading login page') @@ -71,40 +101,25 @@ class TwitchBaseIE(InfoExtractor): if 'blacklist_message' in login_page: fail(clean_html(login_page)) - login_form = self._hidden_inputs(login_page) - - login_form.update({ + login_data = { 'username': username, 'password': password, - }) - - redirect_url = handle.geturl() - - post_url = self._search_regex( - r']+action=(["\'])(?P.+?)\1', login_page, - 'post url', default=redirect_url, group='url') - - if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(redirect_url, post_url) - - headers = {'Referer': redirect_url} + } + redirect_res = post_login_form( + login_page, handle, 'Logging in as %s' % username, login_data) - try: - response = self._download_json( - post_url, None, 'Logging in as %s' % username, - data=urlencode_postdata(login_form), - headers=headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - response = self._parse_json( - e.cause.read().decode('utf-8'), None) - fail(response['message']) - raise - - if response.get('redirect'): - self._download_webpage( - response['redirect'], None, 'Downloading login redirect page', - headers=headers) + if not redirect_res: + return + redirect_page, handle = redirect_res + + if re.search(r'(?i)]+id="two-factor-submit"', redirect_page) is not None: + # TODO: Add mechanism to request an SMS or phone call + tfa_token = self._get_tfa_info('two-factor authentication token') + tfa_data = { + 'authy_token': tfa_token, + 'remember_2fa': 'true', + } + post_login_form(redirect_page, handle, 'Submitting TFA token', tfa_data) def _prefer_source(self, formats): try: From 5316566edcbb1a2ac2e0559a1863b2204242b7d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 02:06:33 +0700 Subject: [PATCH 0435/1696] [twitch] Use better naming and simplify (closes #11974) --- youtube_dl/extractor/twitch.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 096a2ac9d..ed36336bd 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -64,7 +64,7 @@ class TwitchBaseIE(InfoExtractor): raise ExtractorError( 'Unable to login. Twitch said: %s' % message, expected=True) - def post_login_form(page, urlh, note, data): + def login_step(page, urlh, note, data): form = self._hidden_inputs(page) form.update(data) @@ -88,11 +88,10 @@ class TwitchBaseIE(InfoExtractor): fail(response['message']) raise - if response.get('redirect'): - redirect_url = urljoin(post_url, response['redirect']) - return self._download_webpage_handle( - redirect_url, None, 'Downloading login redirect page', - headers=headers) + redirect_url = urljoin(post_url, response['redirect']) + return self._download_webpage_handle( + redirect_url, None, 'Downloading login redirect page', + headers=headers) login_page, handle = self._download_webpage_handle( self._LOGIN_URL, None, 'Downloading login page') @@ -101,25 +100,19 @@ class TwitchBaseIE(InfoExtractor): if 'blacklist_message' in login_page: fail(clean_html(login_page)) - login_data = { - 'username': username, - 'password': password, - } - redirect_res = post_login_form( - login_page, handle, 'Logging in as %s' % username, login_data) - - if not redirect_res: - return - redirect_page, handle = redirect_res + redirect_page, handle = login_step( + login_page, handle, 'Logging in as %s' % username, { + 'username': username, + 'password': password, + }) if re.search(r'(?i)]+id="two-factor-submit"', redirect_page) is not None: # TODO: Add mechanism to request an SMS or phone call tfa_token = self._get_tfa_info('two-factor authentication token') - tfa_data = { + login_step(redirect_page, handle, 'Submitting TFA token', { 'authy_token': tfa_token, 'remember_2fa': 'true', - } - post_login_form(redirect_page, handle, 'Submitting TFA token', tfa_data) + }) def _prefer_source(self, formats): try: From 75027364ba35d8852f393f67860be817fff05541 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 02:22:02 +0700 Subject: [PATCH 0436/1696] [ChangeLog] Actualize --- ChangeLog | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ChangeLog b/ChangeLog index 13ccb0f8f..7e22db53e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,18 @@ version Extractors ++ [twitch] Add basic support for two-factor authentication (#11974) ++ [vier] Add support for vijf.be (#12304) ++ [redbulltv] Add support for redbull.tv (#3919, #11948) * [douyutv] Switch to the PC API to escape the 5-min limitation (#12316) ++ [generic] Add support for rutube embeds ++ [rutube] Relax URL regular expression ++ [vrak] Add support for vrak.tv (#11452) ++ [brightcove:new] Add ability to smuggle geo_countries into URL ++ [brightcove:new] Raise GeoRestrictedError +* [go] Relax URL regular expression (#12341) +* [24video] Use original host for requests (#12339) +* [ruutu] Disable DASH formats (#12322) version 2017.03.02 From afa4597618462df01b47febcd64c531f8ffdd63a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 02:23:08 +0700 Subject: [PATCH 0437/1696] release 2017.03.05 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fec4152e3..988d0d81b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.05** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.02 +[debug] youtube-dl version 2017.03.05 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 7e22db53e..2f3ec1b47 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.03.05 Extractors + [twitch] Add basic support for two-factor authentication (#11974) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a08e00fce..f02c4dea0 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -626,6 +626,7 @@ - **RaiTV** - **RBMARadio** - **RDS**: RDS.ca + - **RedBullTV** - **RedTube** - **RegioTV** - **RENTV** @@ -916,6 +917,7 @@ - **VoxMedia** - **Vporn** - **vpro**: npo.nl and ntr.nl + - **Vrak** - **VRT** - **vube**: Vube.com - **VuClip** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f4c8d3d5f..215f9a3c1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.02' +__version__ = '2017.03.05' From 6d0fe752bfeaa9a758099315f006bc15acd1ae76 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sat, 4 Mar 2017 22:19:44 -0500 Subject: [PATCH 0438/1696] [external:ffmpeg] In test harness, limit to 10k download size Otherwise, if you screw up a playlist test by including a playlist dictionary key, you'll be there for eons while it downloads all the files before erroring out. --- youtube_dl/downloader/external.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index bdd3545a2..127a92d20 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -270,6 +270,10 @@ class FFmpegFD(ExternalFD): args += ['-rtmp_live', 'live'] args += ['-i', url, '-c', 'copy'] + + if self.params.get('test', False): + args += ['-fs', compat_str(self._TEST_FILE_SIZE)] # -fs limit_size (output), expressed in bytes + if protocol in ('m3u8', 'm3u8_native'): if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': args += ['-f', 'mpegts'] From a50862b7355544d3fd8473bf3ff35e9c6643d789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 10:24:29 +0700 Subject: [PATCH 0439/1696] [downloader/external] Add missing import and PEP8 --- youtube_dl/downloader/external.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 127a92d20..e13cf547d 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -6,7 +6,10 @@ import sys import re from .common import FileDownloader -from ..compat import compat_setenv +from ..compat import ( + compat_setenv, + compat_str, +) from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..utils import ( cli_option, @@ -272,7 +275,7 @@ class FFmpegFD(ExternalFD): args += ['-i', url, '-c', 'copy'] if self.params.get('test', False): - args += ['-fs', compat_str(self._TEST_FILE_SIZE)] # -fs limit_size (output), expressed in bytes + args += ['-fs', compat_str(self._TEST_FILE_SIZE)] if protocol in ('m3u8', 'm3u8_native'): if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': From ed0cf9b38394b28bae5f05fd6b00c85a9c0e6755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 23:22:27 +0700 Subject: [PATCH 0440/1696] [extractor/common] Move jwplayer formats extraction in separate method --- youtube_dl/extractor/common.py | 106 +++++++++++++++++---------------- 1 file changed, 56 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index eb3c091aa..9b73a948c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2198,56 +2198,7 @@ class InfoExtractor(object): this_video_id = video_id or video_data['mediaid'] - formats = [] - for source in video_data['sources']: - source_url = self._proto_relative_url(source['file']) - if base_url: - source_url = compat_urlparse.urljoin(base_url, source_url) - source_type = source.get('type') or '' - ext = mimetype2ext(source_type) or determine_ext(source_url) - if source_type == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - source_url, this_video_id, mpd_id=mpd_id, fatal=False)) - # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 - elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): - formats.append({ - 'url': source_url, - 'vcodec': 'none', - 'ext': ext, - }) - else: - height = int_or_none(source.get('height')) - if height is None: - # Often no height is provided but there is a label in - # format like 1080p. - height = int_or_none(self._search_regex( - r'^(\d{3,})[pP]$', source.get('label') or '', - 'height', default=None)) - a_format = { - 'url': source_url, - 'width': int_or_none(source.get('width')), - 'height': height, - 'ext': ext, - } - if source_url.startswith('rtmp'): - a_format['ext'] = 'flv' - - # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as - # of jwplayer.flash.swf - rtmp_url_parts = re.split( - r'((?:mp4|mp3|flv):)', source_url, 1) - if len(rtmp_url_parts) == 3: - rtmp_url, prefix, play_path = rtmp_url_parts - a_format.update({ - 'url': rtmp_url, - 'play_path': prefix + play_path, - }) - if rtmp_params: - a_format.update(rtmp_params) - formats.append(a_format) + formats = self._parse_jwplayer_formats(video_data['sources'], this_video_id) self._sort_formats(formats) subtitles = {} @@ -2278,6 +2229,61 @@ class InfoExtractor(object): else: return self.playlist_result(entries) + def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, + m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): + formats = [] + for source in jwplayer_sources_data : + source_url = self._proto_relative_url(source['file']) + if base_url: + source_url = compat_urlparse.urljoin(base_url, source_url) + source_type = source.get('type') or '' + ext = mimetype2ext(source_type) or determine_ext(source_url) + if source_type == 'hls' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + source_url, video_id, mpd_id=mpd_id, fatal=False)) + # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 + elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): + formats.append({ + 'url': source_url, + 'vcodec': 'none', + 'ext': ext, + }) + else: + height = int_or_none(source.get('height')) + if height is None: + # Often no height is provided but there is a label in + # format like 1080p. + height = int_or_none(self._search_regex( + r'^(\d{3,})[pP]$', source.get('label') or '', + 'height', default=None)) + a_format = { + 'url': source_url, + 'width': int_or_none(source.get('width')), + 'height': height, + 'ext': ext, + } + if source_url.startswith('rtmp'): + a_format['ext'] = 'flv' + + # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as + # of jwplayer.flash.swf + rtmp_url_parts = re.split( + r'((?:mp4|mp3|flv):)', source_url, 1) + if len(rtmp_url_parts) == 3: + rtmp_url, prefix, play_path = rtmp_url_parts + a_format.update({ + 'url': rtmp_url, + 'play_path': prefix + play_path, + }) + if rtmp_params: + a_format.update(rtmp_params) + formats.append(a_format) + return formats + + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() From 0236cd0dfde1dda540c1067a9c5982d482005c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 23:25:03 +0700 Subject: [PATCH 0441/1696] [extractor/common] Improve height extraction and extract bitrate --- youtube_dl/extractor/common.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9b73a948c..2887db0c3 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2240,12 +2240,14 @@ class InfoExtractor(object): ext = mimetype2ext(source_type) or determine_ext(source_url) if source_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + source_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=m3u8_id, fatal=False)) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( source_url, video_id, mpd_id=mpd_id, fatal=False)) # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 - elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): + elif source_type.startswith('audio') or ext in ( + 'oga', 'aac', 'mp3', 'mpeg', 'vorbis'): formats.append({ 'url': source_url, 'vcodec': 'none', @@ -2255,19 +2257,19 @@ class InfoExtractor(object): height = int_or_none(source.get('height')) if height is None: # Often no height is provided but there is a label in - # format like 1080p. + # format like "1080p", "720p SD", or 1080. height = int_or_none(self._search_regex( - r'^(\d{3,})[pP]$', source.get('label') or '', + r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''), 'height', default=None)) a_format = { 'url': source_url, 'width': int_or_none(source.get('width')), 'height': height, + 'tbr': int_or_none(source.get('bitrate')), 'ext': ext, } if source_url.startswith('rtmp'): a_format['ext'] = 'flv' - # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as # of jwplayer.flash.swf rtmp_url_parts = re.split( From 1a2192cb904ff42a309ab2c2477fc226f8651f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 23:28:32 +0700 Subject: [PATCH 0442/1696] [extractor/common] Pass arguments to _parse_jwplayer_formats and PEP8 --- youtube_dl/extractor/common.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2887db0c3..78dc5be24 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2198,7 +2198,9 @@ class InfoExtractor(object): this_video_id = video_id or video_data['mediaid'] - formats = self._parse_jwplayer_formats(video_data['sources'], this_video_id) + formats = self._parse_jwplayer_formats( + video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id, + mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) self._sort_formats(formats) subtitles = {} @@ -2232,7 +2234,7 @@ class InfoExtractor(object): def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): formats = [] - for source in jwplayer_sources_data : + for source in jwplayer_sources_data: source_url = self._proto_relative_url(source['file']) if base_url: source_url = compat_urlparse.urljoin(base_url, source_url) @@ -2285,7 +2287,6 @@ class InfoExtractor(object): formats.append(a_format) return formats - def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() From 5dd376345b8c87e8c2130b80e73e690c5e721a28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Mar 2017 23:31:38 +0700 Subject: [PATCH 0443/1696] [tunepk] Add extractor (closes #12197, closes #12243) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tunepk.py | 90 ++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 youtube_dl/extractor/tunepk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index caf1dc766..b056dff53 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1000,6 +1000,7 @@ from .tunein import ( TuneInTopicIE, TuneInShortenerIE, ) +from .tunepk import TunePkIE from .turbo import TurboIE from .tutv import TutvIE from .tv2 import ( diff --git a/youtube_dl/extractor/tunepk.py b/youtube_dl/extractor/tunepk.py new file mode 100644 index 000000000..9d42651ce --- /dev/null +++ b/youtube_dl/extractor/tunepk.py @@ -0,0 +1,90 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + try_get, + unified_timestamp, +) + + +class TunePkIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?tune\.pk/(?:video/|player/embed_player.php?.*?\bvid=)| + embed\.tune\.pk/play/ + ) + (?P\d+) + ''' + _TESTS = [{ + 'url': 'https://tune.pk/video/6919541/maudie-2017-international-trailer-1-ft-ethan-hawke-sally-hawkins', + 'md5': '0c537163b7f6f97da3c5dd1e3ef6dd55', + 'info_dict': { + 'id': '6919541', + 'ext': 'mp4', + 'title': 'Maudie (2017) | International Trailer # 1 ft Ethan Hawke, Sally Hawkins', + 'description': 'md5:eb5a04114fafef5cec90799a93a2d09c', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1487327564, + 'upload_date': '20170217', + 'uploader': 'Movie Trailers', + 'duration': 107, + 'view_count': int, + } + }, { + 'url': 'https://tune.pk/player/embed_player.php?vid=6919541&folder=2017/02/17/&width=600&height=350&autoplay=no', + 'only_matching': True, + }, { + 'url': 'https://embed.tune.pk/play/6919541?autoplay=no&ssl=yes&inline=true', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'https://tune.pk/video/%s' % video_id, video_id) + + details = self._parse_json( + self._search_regex( + r'new\s+TunePlayer\(({.+?})\)\s*;\s*\n', webpage, 'tune player'), + video_id)['details'] + + video = details['video'] + title = video.get('title') or self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'title', webpage, 'title', fatal=True) + + formats = self._parse_jwplayer_formats( + details['player']['sources'], video_id) + self._sort_formats(formats) + + description = self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'description', webpage, 'description') + + thumbnail = video.get('thumb') or self._og_search_thumbnail( + webpage, default=None) or self._html_search_meta( + 'thumbnail', webpage, 'thumbnail') + + timestamp = unified_timestamp(video.get('date_added')) + uploader = try_get( + video, lambda x: x['uploader']['name'], + compat_str) or self._html_search_meta('author', webpage, 'author') + + duration = int_or_none(video.get('duration')) + view_count = int_or_none(video.get('views')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'uploader': uploader, + 'duration': duration, + 'view_count': view_count, + 'formats': formats, + } From d2b64e04b475b6ef7e5fb74d92ae9a35284fc35a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 6 Mar 2017 00:32:53 +0800 Subject: [PATCH 0444/1696] [addanime] Skip an invalid test --- youtube_dl/extractor/addanime.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 55a9322a7..9f8a71262 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -25,7 +25,8 @@ class AddAnimeIE(InfoExtractor): 'ext': 'mp4', 'description': 'One Piece 606', 'title': 'One Piece 606', - } + }, + 'skip': 'Video is gone', }, { 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687', 'only_matching': True, From fc11ad383311d721179483eea2e289e3a236e457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Mar 2017 03:21:03 +0700 Subject: [PATCH 0445/1696] [drtv:live] Bypass geo restriction --- youtube_dl/extractor/drtv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index e966d7483..b879f2c2b 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -156,6 +156,7 @@ class DRTVIE(InfoExtractor): class DRTVLiveIE(InfoExtractor): IE_NAME = 'drtv:live' _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P[\da-z-]+)' + _GEO_COUNTRIES = ['DK'] _TEST = { 'url': 'https://www.dr.dk/tv/live/dr1', 'info_dict': { From 96182695e4e37795a30ab143129c91dab18a9865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Mar 2017 03:23:01 +0700 Subject: [PATCH 0446/1696] [drtv] Add geo countries to GeoRestrictedError --- youtube_dl/extractor/drtv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index b879f2c2b..e4917014a 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -15,6 +15,8 @@ from ..utils import ( class DRTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['DK'] IE_NAME = 'drtv' _TESTS = [{ 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', @@ -137,7 +139,7 @@ class DRTVIE(InfoExtractor): if not formats and restricted_to_denmark: self.raise_geo_restricted( 'Unfortunately, DR is not allowed to show this program outside Denmark.', - expected=True) + countries=self._GEO_COUNTRIES) self._sort_formats(formats) From 4b5de77bdb7765df4797bf068592926285ba709a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Mar 2017 03:57:46 +0700 Subject: [PATCH 0447/1696] [utils] Process bytestrings in urljoin (closes #12369) --- test/test_utils.py | 3 +++ youtube_dl/utils.py | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index aefd94518..173c49514 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -455,6 +455,9 @@ class TestUtil(unittest.TestCase): def test_urljoin(self): self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 8738aa249..d293c7498 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1748,11 +1748,16 @@ def base_url(url): def urljoin(base, path): + if isinstance(path, bytes): + path = path.decode('utf-8') if not isinstance(path, compat_str) or not path: return None if re.match(r'^(?:https?:)?//', path): return path - if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base): + if isinstance(base, bytes): + base = base.decode('utf-8') + if not isinstance(base, compat_str) or not re.match( + r'^(?:https?:)?//', base): return None return compat_urlparse.urljoin(base, path) From 3f116b189bb990529a1a18ba7a3829b1592cfecd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Mar 2017 04:01:21 +0700 Subject: [PATCH 0448/1696] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2f3ec1b47..c1f43a625 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +version + +Core ++ [utils] Process bytestrings in urljoin (#12369) +* [extractor/common] Improve height extraction and extract bitrate +* [extractor/common] Move jwplayer formats extraction in separate method ++ [external:ffmpeg] Limit test download size to 10KiB (#12362) + +Extractors ++ [drtv] Add geo countries to GeoRestrictedError ++ [drtv:live] Bypass geo restriction ++ [tunepk] Add extractor (#12197, #12243) + + version 2017.03.05 Extractors From 16647026264b35a40ecd56f5d9392d0643a2066c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Mar 2017 04:04:39 +0700 Subject: [PATCH 0449/1696] release 2017.03.06 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 988d0d81b..a36d7322d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.05** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.06** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.05 +[debug] youtube-dl version 2017.03.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c1f43a625..648e04856 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.03.06 Core + [utils] Process bytestrings in urljoin (#12369) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f02c4dea0..85c59ca81 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -798,6 +798,7 @@ - **tunein:program** - **tunein:station** - **tunein:topic** + - **TunePk** - **Turbo** - **Tutv** - **tv.dfb.de** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 215f9a3c1..71a984913 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.05' +__version__ = '2017.03.06' From da92da4b886a0e44fe28591ddf5b746fba1c9ade Mon Sep 17 00:00:00 2001 From: denneboomyo Date: Mon, 6 Mar 2017 11:00:17 +0100 Subject: [PATCH 0450/1696] Openload fix extraction (#12357) * Fix extraction --- youtube_dl/extractor/openload.py | 47 +++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index fc7ff43a6..25f6a9aca 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -75,22 +75,37 @@ class OpenloadIE(InfoExtractor): ']+id="[^"]+"[^>]*>([0-9A-Za-z]+)', webpage, 'openload ID') - first_char = int(ol_id[0]) - urlcode = [] - num = 1 - - while num < len(ol_id): - i = ord(ol_id[num]) - key = 0 - if i <= 90: - key = i - 65 - elif i >= 97: - key = 25 + i - 97 - urlcode.append((key, compat_chr(int(ol_id[num + 2:num + 5]) // int(ol_id[num + 1]) - first_char))) - num += 5 - - video_url = 'https://openload.co/stream/' + ''.join( - [value for _, value in sorted(urlcode, key=lambda x: x[0])]) + video_url_chars = [] + + first_char = ord(ol_id[0]) + key = first_char - 55 + maxKey = max(2, key) + key = min(maxKey, len(ol_id) - 14) + t = ol_id[key:key + 12] + + hashMap = {} + v = ol_id.replace(t, "") + h = 0 + + while h < len(t): + f = t[h:h + 2] + i = int(f, 16) + hashMap[h / 2] = i + h += 2 + + h = 0 + + while h < len(v): + B = v[h:h + 2] + i = int(B, 16) + index = (h / 2) % 6 + A = hashMap[index] + i = i ^ A + video_url_chars.append(compat_chr(i)) + h += 2 + + video_url = 'https://openload.co/stream/%s?mime=true' + video_url = video_url % (''.join(video_url_chars)) title = self._og_search_title(webpage, default=None) or self._search_regex( r']+class=["\']title["\'][^>]*>([^<]+)', webpage, From 92cb5763f440d0ca1627f8120a7ce29598eb9484 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 6 Mar 2017 18:04:19 +0800 Subject: [PATCH 0451/1696] [ChangeLog] Update after #12357 --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index 648e04856..5fb4c20af 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [openload] Fix extraction (#10408, #12357) + + version 2017.03.06 Core From 54a3a8827baf71c553723b6766e676369e9c743c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 9 Jan 2017 02:36:39 +0800 Subject: [PATCH 0452/1696] [__init__] Metadata should be added after conversion Fixes #5594 --- ChangeLog | 4 ++++ youtube_dl/__init__.py | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5fb4c20af..ad65505c5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ version +Core +* [__init__] Metadata are now added after conversion (#5594) + + Extractors * [openload] Fix extraction (#10408, #12357) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 0c401baa6..ad5f13d2b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -242,14 +242,11 @@ def _real_main(argv=None): # PostProcessors postprocessors = [] - # Add the metadata pp first, the other pps will copy it if opts.metafromtitle: postprocessors.append({ 'key': 'MetadataFromTitle', 'titleformat': opts.metafromtitle }) - if opts.addmetadata: - postprocessors.append({'key': 'FFmpegMetadata'}) if opts.extractaudio: postprocessors.append({ 'key': 'FFmpegExtractAudio', @@ -279,6 +276,11 @@ def _real_main(argv=None): }) if not already_have_thumbnail: opts.writethumbnail = True + # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and + # FFmpegExtractAudioPP as containers before conversion may not support + # metadata (3gp, webm, etc.) + if opts.addmetadata: + postprocessors.append({'key': 'FFmpegMetadata'}) # XAttrMetadataPP should be run after post-processors that may change file # contents if opts.xattrs: From e30ccf7047eb3b8dff8b778790f9b084e6d7f42e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 6 Mar 2017 23:05:38 +0700 Subject: [PATCH 0453/1696] [soundcloud] Update client id (closes #12376) --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index b3aa4ce26..0ee4a8ff8 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -121,7 +121,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = 'fDoItMDbsbZz8dY16ZzARCZmzgHBPotA' + _CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' @staticmethod From 80146dcc6c27b46fb8340d3285d95f2f7674fb0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Mar 2017 03:57:54 +0700 Subject: [PATCH 0454/1696] [ChangeLog] Actualize --- ChangeLog | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index ad65505c5..622086440 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,10 @@ version Core -* [__init__] Metadata are now added after conversion (#5594) - +* Metadata are now added after conversion (#5594) Extractors +* [soundcloud] Update client id (#12376) * [openload] Fix extraction (#10408, #12357) From dccd0ab35d1acc45e36241c505b5325d96ca501d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Mar 2017 03:59:22 +0700 Subject: [PATCH 0455/1696] release 2017.03.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a36d7322d..eb69696c8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.06** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.06 +[debug] youtube-dl version 2017.03.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 622086440..601aad902 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.03.07 Core * Metadata are now added after conversion (#5594) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 71a984913..bd451bf81 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.06' +__version__ = '2017.03.07' From d7d7f84c9565d682119d081324c26eb361e05aa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Mar 2017 04:03:52 +0700 Subject: [PATCH 0456/1696] Credit @benages for redbull.tv (#11948) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index fd1b8c9ea..74abda016 100644 --- a/AUTHORS +++ b/AUTHORS @@ -207,3 +207,4 @@ Marek Rusinowski Tobias Gruetzmacher Olivier Bilodeau Lars Vierbergen +Juanjo Benages From 9df53ea36ec84c6ab5a4a672d120f7850e2363ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Mar 2017 04:04:49 +0700 Subject: [PATCH 0457/1696] Credit @puxlit for twitch 2fa (#11974) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 74abda016..273a6a034 100644 --- a/AUTHORS +++ b/AUTHORS @@ -208,3 +208,4 @@ Tobias Gruetzmacher Olivier Bilodeau Lars Vierbergen Juanjo Benages +Xiao Di Guan From fe646a2f106fef0651158c460dd766c236e2f5db Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 7 Mar 2017 15:34:06 +0800 Subject: [PATCH 0458/1696] [twitch] PEP8 --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index ed36336bd..2daf9dfac 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -104,7 +104,7 @@ class TwitchBaseIE(InfoExtractor): login_page, handle, 'Logging in as %s' % username, { 'username': username, 'password': password, - }) + }) if re.search(r'(?i)]+id="two-factor-submit"', redirect_page) is not None: # TODO: Add mechanism to request an SMS or phone call From 2e76bdc850ed5d5ffe95578b576b8fb66dcea8f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Mar 2017 22:59:33 +0700 Subject: [PATCH 0459/1696] [brightcove:legacy] Relax videoPlayer validation check (closes #12381) --- youtube_dl/extractor/brightcove.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 66c8cb219..46ef8e605 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -193,7 +193,13 @@ class BrightcoveLegacyIE(InfoExtractor): if videoPlayer is not None: if isinstance(videoPlayer, list): videoPlayer = videoPlayer[0] - if not (videoPlayer.isdigit() or videoPlayer.startswith('ref:')): + videoPlayer = videoPlayer.strip() + # UUID is also possible for videoPlayer (e.g. + # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd + # or http://www8.hp.com/cn/zh/home.html) + if not (re.match( + r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$', + videoPlayer) or videoPlayer.startswith('ref:')): return None params['@videoPlayer'] = videoPlayer linkBase = find_param('linkBaseURL') From b68a812ea839e44148516a34a15193189e58ba77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 7 Mar 2017 23:00:21 +0700 Subject: [PATCH 0460/1696] [extractor/generic] Add test for brigthcove UUID-like videoPlayer --- youtube_dl/extractor/generic.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ebab9509d..bc22421ae 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -449,6 +449,23 @@ class GenericIE(InfoExtractor): }, }], }, + { + # Brightcove with UUID in videoPlayer + 'url': 'http://www8.hp.com/cn/zh/home.html', + 'info_dict': { + 'id': '5255815316001', + 'ext': 'mp4', + 'title': 'Sprocket Video - China', + 'description': 'Sprocket Video - China', + 'uploader': 'HP-Video Gallery', + 'timestamp': 1482263210, + 'upload_date': '20161220', + 'uploader_id': '1107601872001', + }, + 'params': { + 'skip_download': True, # m3u8 download + }, + }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', From b08cc749d6e1a3078d807580c424437bca143b73 Mon Sep 17 00:00:00 2001 From: denneboomyo Date: Tue, 7 Mar 2017 23:01:27 +0100 Subject: [PATCH 0461/1696] [openload] Fix extraction --- youtube_dl/extractor/openload.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 25f6a9aca..5a5607357 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -78,10 +78,10 @@ class OpenloadIE(InfoExtractor): video_url_chars = [] first_char = ord(ol_id[0]) - key = first_char - 55 + key = first_char - 50 maxKey = max(2, key) - key = min(maxKey, len(ol_id) - 14) - t = ol_id[key:key + 12] + key = min(maxKey, len(ol_id) - 22) + t = ol_id[key:key + 20] hashMap = {} v = ol_id.replace(t, "") @@ -98,8 +98,9 @@ class OpenloadIE(InfoExtractor): while h < len(v): B = v[h:h + 2] i = int(B, 16) - index = (h / 2) % 6 + index = (h / 2) % 10 A = hashMap[index] + i = i ^ 137 i = i ^ A video_url_chars.append(compat_chr(i)) h += 2 From d7344d33b11cc10d1a668c1ab9a65f2a34b4000f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 8 Mar 2017 18:25:04 +0700 Subject: [PATCH 0462/1696] [telequebec] Fix description extraction and update test (closes #12399) --- youtube_dl/extractor/telequebec.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py index 82d73c31d..fafaa826f 100644 --- a/youtube_dl/extractor/telequebec.py +++ b/youtube_dl/extractor/telequebec.py @@ -2,15 +2,17 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( int_or_none, smuggle_url, + try_get, ) class TeleQuebecIE(InfoExtractor): _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://zonevideo.telequebec.tv/media/20984/le-couronnement-de-new-york/couronnement-de-new-york', 'md5': 'fe95a0957e5707b1b01f5013e725c90f', 'info_dict': { @@ -18,10 +20,14 @@ class TeleQuebecIE(InfoExtractor): 'ext': 'mp4', 'title': 'Le couronnement de New York', 'description': 'md5:f5b3d27a689ec6c1486132b2d687d432', - 'upload_date': '20160220', - 'timestamp': 1455965438, + 'upload_date': '20170201', + 'timestamp': 1485972222, } - } + }, { + # no description + 'url': 'http://zonevideo.telequebec.tv/media/30261', + 'only_matching': True, + }] def _real_extract(self, url): media_id = self._match_id(url) @@ -31,9 +37,13 @@ class TeleQuebecIE(InfoExtractor): return { '_type': 'url_transparent', 'id': media_id, - 'url': smuggle_url('limelight:media:' + media_data['streamInfo']['sourceId'], {'geo_countries': ['CA']}), + 'url': smuggle_url( + 'limelight:media:' + media_data['streamInfo']['sourceId'], + {'geo_countries': ['CA']}), 'title': media_data['title'], - 'description': media_data.get('descriptions', [{'text': None}])[0].get('text'), - 'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000), + 'description': try_get( + media_data, lambda x: x['descriptions'][0]['text'], compat_str), + 'duration': int_or_none( + media_data.get('durationInMilliseconds'), 1000), 'ie_key': 'LimelightMedia', } From 0f6b87d067f33b5e9b8acf2f8a1f2afa9546439e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 8 Mar 2017 19:46:58 +0800 Subject: [PATCH 0463/1696] [miomio] Fix extraction Closes #12291 Closes #12388 Closes #12402 --- ChangeLog | 6 ++++++ youtube_dl/extractor/miomio.py | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 601aad902..b000e2e94 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [miomio] Fix extraction (#12291, #12388, #12402) + + version 2017.03.07 Core diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index ec1b4c4fe..40f72d66f 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -51,6 +51,7 @@ class MioMioIE(InfoExtractor): 'ext': 'mp4', 'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31', }, + 'skip': 'Unable to load videos', }] def _extract_mioplayer(self, webpage, video_id, title, http_headers): @@ -94,9 +95,18 @@ class MioMioIE(InfoExtractor): return entries + def _download_chinese_webpage(self, *args, **kwargs): + # Requests with English locales return garbage + headers = { + 'Accept-Language': 'zh-TW,en-US;q=0.7,en;q=0.3', + } + kwargs.setdefault('headers', {}).update(headers) + return self._download_webpage(*args, **kwargs) + def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_chinese_webpage( + url, video_id) title = self._html_search_meta( 'description', webpage, 'title', fatal=True) @@ -106,7 +116,7 @@ class MioMioIE(InfoExtractor): if '_h5' in mioplayer_path: player_url = compat_urlparse.urljoin(url, mioplayer_path) - player_webpage = self._download_webpage( + player_webpage = self._download_chinese_webpage( player_url, video_id, note='Downloading player webpage', headers={'Referer': url}) entries = self._parse_html5_media_entries(player_url, player_webpage, video_id) From 0cf2352e858982ed811122cf867fb5e25694d97a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 8 Mar 2017 21:19:08 +0700 Subject: [PATCH 0464/1696] [dplayit] Separate and rewrite extractor and bypass geo restriction (closes #12393) --- youtube_dl/extractor/dplay.py | 117 +++++++++++++++++++++++------ youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 100 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 32028bc3b..62e676389 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -6,37 +6,24 @@ import re import time from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_urlparse, + compat_HTTPError, +) from ..utils import ( USER_AGENTS, + ExtractorError, int_or_none, + unified_strdate, + remove_end, update_url_query, ) class DPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?Pit\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P[^/?#]+)' + _VALID_URL = r'https?://(?Pwww\.dplay\.(?:dk|se|no))/[^/]+/(?P[^/?#]+)' _TESTS = [{ - # geo restricted, via direct unsigned hls URL - 'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/', - 'info_dict': { - 'id': '1255600', - 'display_id': 'stagione-1-episodio-25', - 'ext': 'mp4', - 'title': 'Episodio 25', - 'description': 'md5:cae5f40ad988811b197d2d27a53227eb', - 'duration': 2761, - 'timestamp': 1454701800, - 'upload_date': '20160205', - 'creator': 'RTIT', - 'series': 'Take me out', - 'season_number': 1, - 'episode_number': 25, - 'age_limit': 0, - }, - 'expected_warnings': ['Unable to download f4m manifest'], - }, { # non geo restricted, via secure api, unsigned download hls URL 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', 'info_dict': { @@ -168,3 +155,91 @@ class DPlayIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class DPlayItIE(InfoExtractor): + _VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P[^/?#]+)' + _GEO_COUNTRIES = ['IT'] + _TEST = { + 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', + 'md5': '2b808ffb00fc47b884a172ca5d13053c', + 'info_dict': { + 'id': '6918', + 'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij', + 'ext': 'mp4', + 'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij', + 'description': 'md5:3c7a4303aef85868f867a26f5cc14813', + 'thumbnail': r're:^https?://.*\.jpe?g', + 'upload_date': '20160524', + 'series': 'Biografie imbarazzanti', + 'season_number': 1, + 'episode': 'Luigi Di Maio: la psicosi di Stanislawskij', + 'episode_number': 1, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'url\s*:\s*["\']https://dplay-south-prod\.disco-api\.com/playback/videoPlaybackInfo/(\d+)', + webpage, 'video id') + + title = remove_end(self._og_search_title(webpage), ' | Dplay') + + try: + info = self._download_json( + 'https://dplay-south-prod.disco-api.com/playback/videoPlaybackInfo/%s' % video_id, + display_id, headers={ + 'Authorization': 'Bearer %s' % self._get_cookies(url).get( + 'dplayit_token').value, + 'Referer': url, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): + info = self._parse_json(e.cause.read().decode('utf-8'), display_id) + error = info['errors'][0] + if error.get('code') == 'access.denied.geoblocked': + self.raise_geo_restricted( + msg=error.get('detail'), countries=self._GEO_COUNTRIES) + raise ExtractorError(info['errors'][0]['detail'], expected=True) + raise + + hls_url = info['data']['attributes']['streaming']['hls']['url'] + + formats = self._extract_m3u8_formats( + hls_url, display_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + series = self._html_search_regex( + r'(?s)]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)

', + webpage, 'series', fatal=False) + episode = self._search_regex( + r']+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*
\s*([^<]+)', + webpage, 'episode', fatal=False) + + mobj = re.search( + r'(?s)]+class=["\']dates["\'][^>]*>.+?\bS\.(?P\d+)\s+E\.(?P\d+)\s*-\s*(?P\d{2}/\d{2}/\d{4})', + webpage) + if mobj: + season_number = int(mobj.group('season_number')) + episode_number = int(mobj.group('episode_number')) + upload_date = unified_strdate(mobj.group('upload_date')) + else: + season_number = episode_number = upload_date = None + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + 'upload_date': upload_date, + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b056dff53..bd2762e47 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -246,7 +246,10 @@ from .dfb import DFBIE from .dhm import DHMIE from .dotsub import DotsubIE from .douyutv import DouyuTVIE -from .dplay import DPlayIE +from .dplay import ( + DPlayIE, + DPlayItIE, +) from .dramafever import ( DramaFeverIE, DramaFeverSeriesIE, From 0e7f9a9b48700efd40c4068b00364a7963dc9265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 8 Mar 2017 21:30:30 +0700 Subject: [PATCH 0465/1696] [dplayit] Relax playback info URL extraction --- youtube_dl/extractor/dplay.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 62e676389..87c5dd63e 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -183,16 +183,15 @@ class DPlayItIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - r'url\s*:\s*["\']https://dplay-south-prod\.disco-api\.com/playback/videoPlaybackInfo/(\d+)', + info_url = self._search_regex( + r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)', webpage, 'video id') title = remove_end(self._og_search_title(webpage), ' | Dplay') try: info = self._download_json( - 'https://dplay-south-prod.disco-api.com/playback/videoPlaybackInfo/%s' % video_id, - display_id, headers={ + info_url, display_id, headers={ 'Authorization': 'Bearer %s' % self._get_cookies(url).get( 'dplayit_token').value, 'Referer': url, @@ -231,7 +230,7 @@ class DPlayItIE(InfoExtractor): season_number = episode_number = upload_date = None return { - 'id': video_id, + 'id': info_url.rpartition('/')[-1], 'display_id': display_id, 'title': title, 'description': self._og_search_description(webpage), From 2913821723c826a0d2bfc16427592bf2b9d6d31c Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Thu, 9 Mar 2017 17:18:37 +0100 Subject: [PATCH 0466/1696] [prosiebensat1] Improve title extraction (closes #12318) --- youtube_dl/extractor/prosiebensat1.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 1245309a7..0cda992b4 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -369,7 +369,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): def _extract_clip(self, url, webpage): clip_id = self._html_search_regex( self._CLIPID_REGEXES, webpage, 'clip id') - title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title') + title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title', default=None) + if title is None: + title = self._og_search_title(webpage) info = self._extract_video_info(url, clip_id) description = self._html_search_regex( self._DESCRIPTION_REGEXES, webpage, 'description', default=None) From 76bee08fe72f154ad3754428eb80413ad0a3aa19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 9 Mar 2017 23:42:07 +0700 Subject: [PATCH 0467/1696] [prosiebensat1] Improve title extraction and add test --- youtube_dl/extractor/prosiebensat1.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 0cda992b4..d8a4bd244 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -300,6 +300,21 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'skip_download': True, }, }, + { + # title in

+ 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip', + 'info_dict': { + 'id': '4895826', + 'ext': 'mp4', + 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe', + 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9', + 'upload_date': '20170302', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'geo restricted to Germany', + }, { # geo restricted to Germany 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', @@ -338,6 +353,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): r'
\s*

([^<]+)

\s*
', r'

\s*(.+?)

', r']+id="veeseoTitle"[^>]*>(.+?)', + r']+class="subtitle"[^>]*>([^<]+)

', ] _DESCRIPTION_REGEXES = [ r'

\s*(.+?)

', @@ -369,9 +385,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): def _extract_clip(self, url, webpage): clip_id = self._html_search_regex( self._CLIPID_REGEXES, webpage, 'clip id') - title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title', default=None) - if title is None: - title = self._og_search_title(webpage) + title = self._html_search_regex( + self._TITLE_REGEXES, webpage, 'title', + default=None) or self._og_search_title(webpage) info = self._extract_video_info(url, clip_id) description = self._html_search_regex( self._DESCRIPTION_REGEXES, webpage, 'description', default=None) From f802c4866017017afa642ca9ea1ea87edad081b4 Mon Sep 17 00:00:00 2001 From: runningbits Date: Fri, 10 Mar 2017 16:59:32 +0100 Subject: [PATCH 0468/1696] [wdr:maus] Fix extraction and update tests --- youtube_dl/extractor/wdr.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index f7e6360a3..110999827 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -19,9 +19,9 @@ class WDRBaseIE(InfoExtractor): def _extract_wdr_video(self, webpage, display_id): # for wdr.de the data-extension is in a tag with the class "mediaLink" # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" - # for wdrmaus its in a link to the page in a multiline "videoLink"-tag + # for wdrmaus it is in a link to the page in a multiline "videoLink"-tag json_metadata = self._html_search_regex( - r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', + r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', webpage, 'media link', default=None, flags=re.MULTILINE) if not json_metadata: @@ -161,23 +161,23 @@ class WDRIE(WDRBaseIE): { 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5', 'info_dict': { - 'id': 'mdb-1096487', - 'ext': 'flv', + 'id': 'mdb-1323501', + 'ext': 'mp4', 'upload_date': 're:^[0-9]{8}$', 'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$', - 'description': '- Die Sendung mit der Maus -', + 'description': 'Die Seite mit der Maus -', }, 'skip': 'The id changes from week to week because of the new episode' }, { - 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', + 'url': 'http://www.wdrmaus.de/filme/sachgeschichten/achterbahn.php5', 'md5': '803138901f6368ee497b4d195bb164f2', 'info_dict': { 'id': 'mdb-186083', 'ext': 'mp4', 'upload_date': '20130919', 'title': 'Sachgeschichte - Achterbahn ', - 'description': '- Die Sendung mit der Maus -', + 'description': 'Die Seite mit der Maus -', }, }, { @@ -186,7 +186,7 @@ class WDRIE(WDRBaseIE): 'info_dict': { 'id': 'mdb-869971', 'ext': 'flv', - 'title': 'Funkhaus Europa Livestream', + 'title': 'COSMO Livestream', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20160101', }, From bd34c32bd754f30dd34b2d43604de73681b7148b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 10 Mar 2017 23:07:36 +0700 Subject: [PATCH 0469/1696] [wdr] Actualize comment --- youtube_dl/extractor/wdr.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 110999827..8bb7362bb 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -19,7 +19,8 @@ class WDRBaseIE(InfoExtractor): def _extract_wdr_video(self, webpage, display_id): # for wdr.de the data-extension is in a tag with the class "mediaLink" # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" - # for wdrmaus it is in a link to the page in a multiline "videoLink"-tag + # for wdrmaus, in a tag with the class "videoButton" (previously a link + # to the page in a multiline "videoLink"-tag) json_metadata = self._html_search_regex( r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', webpage, 'media link', default=None, flags=re.MULTILINE) @@ -32,7 +33,7 @@ class WDRBaseIE(InfoExtractor): jsonp_url = media_link_obj['mediaObj']['url'] metadata = self._download_json( - jsonp_url, 'metadata', transform_source=strip_jsonp) + jsonp_url, display_id, transform_source=strip_jsonp) metadata_tracker_data = metadata['trackerData'] metadata_media_resource = metadata['mediaResource'] From c2ee861c6d9697d9b46fdad9719b087b87cfb481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 10 Mar 2017 23:16:53 +0700 Subject: [PATCH 0470/1696] [extractor/generic] Make title optional for jwplayer embeds (closes #12410) --- youtube_dl/extractor/generic.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bc22421ae..ad47c3b6b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2550,7 +2550,10 @@ class GenericIE(InfoExtractor): try: jwplayer_data = self._parse_json( jwplayer_data_str, video_id, transform_source=js_to_json) - return self._parse_jwplayer_data(jwplayer_data, video_id) + info = self._parse_jwplayer_data( + jwplayer_data, video_id, require_title=False) + if not info.get('title'): + info['title'] = video_title except ExtractorError: pass From f9e5c92c945bbc2c3c361f0e40f001d8f97e6b64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 10 Mar 2017 23:23:24 +0700 Subject: [PATCH 0471/1696] [ChangeLog] Actualize --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index b000e2e94..e731e1998 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,14 @@ version Extractors +* [generic] Make title optional for jwplayer embeds (#12410) +* [wdr:maus] Fix extraction (#12373) +* [prosiebensat1] Improve title extraction (#12318, #12327) +* [dplayit] Separate and rewrite extractor and bypass geo restriction (#12393) * [miomio] Fix extraction (#12291, #12388, #12402) +* [telequebec] Fix description extraction (#12399) +* [openload] Fix extraction (#12357) +* [brightcove:legacy] Relax videoPlayer validation check (#12381) version 2017.03.07 From a8e687a4dac412ab66d63e7da93e032cfe440c05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 10 Mar 2017 23:26:28 +0700 Subject: [PATCH 0472/1696] release 2017.03.10 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index eb69696c8..76e09c42a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.07** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.07 +[debug] youtube-dl version 2017.03.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e731e1998..848eba76b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.03.10 Extractors * [generic] Make title optional for jwplayer embeds (#12410) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 85c59ca81..09dc830cb 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -212,6 +212,7 @@ - **Dotsub** - **DouyuTV**: 斗鱼 - **DPlay** + - **DPlayIt** - **dramafever** - **dramafever:series** - **DRBonanza** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index bd451bf81..d74046b37 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.07' +__version__ = '2017.03.10' From 4605c94d1a386a71f170dc46d491c4ef78828753 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 11 Mar 2017 19:37:45 +0800 Subject: [PATCH 0473/1696] [__init__] Fix missing subtitles if --add-metadata is used (#12423) The previous fix for #5594 is incorrect --- ChangeLog | 6 ++++++ youtube_dl/__init__.py | 15 ++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 848eba76b..b1425e630 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* Fix missing subtitles if --add-metadata is used (#12423) + + version 2017.03.10 Extractors diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ad5f13d2b..c482f9375 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -259,6 +259,16 @@ def _real_main(argv=None): 'key': 'FFmpegVideoConvertor', 'preferedformat': opts.recodevideo, }) + # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and + # FFmpegExtractAudioPP as containers before conversion may not support + # metadata (3gp, webm, etc.) + # And this post-processor should be placed before other metadata + # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of + # extra metadata. By default ffmpeg preserves metadata applicable for both + # source and target containers. From this point the container won't change, + # so metadata can be added here. + if opts.addmetadata: + postprocessors.append({'key': 'FFmpegMetadata'}) if opts.convertsubtitles: postprocessors.append({ 'key': 'FFmpegSubtitlesConvertor', @@ -276,11 +286,6 @@ def _real_main(argv=None): }) if not already_have_thumbnail: opts.writethumbnail = True - # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and - # FFmpegExtractAudioPP as containers before conversion may not support - # metadata (3gp, webm, etc.) - if opts.addmetadata: - postprocessors.append({'key': 'FFmpegMetadata'}) # XAttrMetadataPP should be run after post-processors that may change file # contents if opts.xattrs: From 70497994707b6bcbd6c0ecc3fb1073be4d75d970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 11 Mar 2017 23:16:51 +0700 Subject: [PATCH 0474/1696] [discoverygo:playlist] Add extractor (closes #12424) --- youtube_dl/extractor/discoverygo.py | 59 +++++++++++++++++++++++++++-- youtube_dl/extractor/extractors.py | 5 ++- 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index 2042493a8..d14de8f00 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -1,17 +1,21 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( extract_attributes, + ExtractorError, int_or_none, parse_age_limit, - ExtractorError, + remove_end, + unescapeHTML, ) -class DiscoveryGoIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)?(?: +class DiscoveryGoBaseIE(InfoExtractor): + _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?: discovery| investigationdiscovery| discoverylife| @@ -21,7 +25,11 @@ class DiscoveryGoIE(InfoExtractor): sciencechannel| tlc| velocitychannel - )go\.com/(?:[^/]+/)*(?P[^/?#&]+)''' + )go\.com/%s(?P[^/?#&]+)''' + + +class DiscoveryGoIE(DiscoveryGoBaseIE): + _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' _TEST = { 'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/', 'info_dict': { @@ -113,3 +121,46 @@ class DiscoveryGoIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE): + _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % '' + _TEST = { + 'url': 'https://www.discoverygo.com/bering-sea-gold/', + 'info_dict': { + 'id': 'bering-sea-gold', + 'title': 'Bering Sea Gold', + 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e', + }, + 'playlist_mincount': 6, + } + + @classmethod + def suitable(cls, url): + return False if DiscoveryGoIE.suitable(url) else super( + DiscoveryGoPlaylistIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + entries = [] + for mobj in re.finditer(r'data-json=(["\'])(?P{.+?})\1', webpage): + data = self._parse_json( + mobj.group('json'), display_id, + transform_source=unescapeHTML, fatal=False) + if not isinstance(data, dict) or data.get('type') != 'episode': + continue + episode_url = data.get('socialUrl') + if not episode_url: + continue + entries.append(self.url_result( + episode_url, ie=DiscoveryGoIE.ie_key(), + video_id=data.get('id'))) + + return self.playlist_result( + entries, display_id, + remove_end(self._og_search_title( + webpage, fatal=False), ' | Discovery GO'), + self._og_search_description(webpage)) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bd2762e47..24c478932 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -265,7 +265,10 @@ from .dvtv import DVTVIE from .dumpert import DumpertIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE -from .discoverygo import DiscoveryGoIE +from .discoverygo import ( + DiscoveryGoIE, + DiscoveryGoPlaylistIE, +) from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dropbox import DropboxIE From a28f8d739683577ea632b1523eee418ed5177b73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 11 Mar 2017 23:18:42 +0700 Subject: [PATCH 0475/1696] [discoverygo] Bypass geo restriction --- youtube_dl/extractor/discoverygo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index d14de8f00..de94c4b09 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -30,6 +30,7 @@ class DiscoveryGoBaseIE(InfoExtractor): class DiscoveryGoIE(DiscoveryGoBaseIE): _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' + _GEO_COUNTRIES = ['US'] _TEST = { 'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/', 'info_dict': { From 57b0ddb35fd541f084f56e192fb46b4f61476664 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 11 Mar 2017 23:21:08 +0700 Subject: [PATCH 0476/1696] [discoverygo] Actualize test --- youtube_dl/extractor/discoverygo.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index de94c4b09..7cd5d4291 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -32,16 +32,16 @@ class DiscoveryGoIE(DiscoveryGoBaseIE): _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' _GEO_COUNTRIES = ['US'] _TEST = { - 'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/', + 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/', 'info_dict': { - 'id': '57a33c536b66d1cd0345eeb1', + 'id': '58c167d86b66d12f2addeb01', 'ext': 'mp4', - 'title': 'Kiss First, Ask Questions Later!', - 'description': 'md5:fe923ba34050eae468bffae10831cb22', - 'duration': 2579, - 'series': 'Love at First Kiss', - 'season_number': 1, - 'episode_number': 1, + 'title': 'Reaper Madness', + 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78', + 'duration': 2519, + 'series': 'Bering Sea Gold', + 'season_number': 8, + 'episode_number': 6, 'age_limit': 14, }, } From 8c996232591848fe9c0c0585fd9958db4431c513 Mon Sep 17 00:00:00 2001 From: Starsam80 Date: Sat, 11 Mar 2017 21:18:10 -0700 Subject: [PATCH 0477/1696] [crunchyroll] Extract season name --- youtube_dl/extractor/crunchyroll.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 9c6cf00ca..d15fd3744 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -177,6 +177,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): 'uploader': 'Kadokawa Pictures Inc.', 'upload_date': '20170118', 'series': "KONOSUBA -God's blessing on this wonderful world!", + 'season': "KONOSUBA -God's blessing on this wonderful world! 2", 'season_number': 2, 'episode': 'Give Me Deliverance from this Judicial Injustice!', 'episode_number': 1, @@ -222,6 +223,23 @@ class CrunchyrollIE(CrunchyrollBaseIE): # just test metadata extraction 'skip_download': True, }, + }, { + # A video with a vastly different season name compared to the series name + 'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532', + 'info_dict': { + 'id': '590532', + 'ext': 'mp4', + 'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test', + 'description': 'Mahiro and Nyaruko talk about official certification.', + 'uploader': 'TV TOKYO', + 'upload_date': '20120305', + 'series': 'Nyarko-san: Another Crawling Chaos', + 'season': 'Haiyoru! Nyaruani (ONA)', + }, + 'params': { + # Just test metadata extraction + 'skip_download': True, + }, }] _FORMAT_IDS = { @@ -491,7 +509,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text # webpage provide more accurate data than series_title from XML series = self._html_search_regex( r'id=["\']showmedia_about_episode_num[^>]+>\s*]+>([^<]+)', - webpage, 'series', default=xpath_text(metadata, 'series_title')) + webpage, 'series', fatal=False) + season = xpath_text(metadata, 'series_title') episode = xpath_text(metadata, 'episode_title') episode_number = int_or_none(xpath_text(metadata, 'episode_number')) @@ -508,6 +527,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'uploader': video_uploader, 'upload_date': video_upload_date, 'series': series, + 'season': season, 'season_number': season_number, 'episode': episode, 'episode_number': episode_number, From c1795ca6c8b1351a563a3e91023e46d18d59d52b Mon Sep 17 00:00:00 2001 From: Lucas M Date: Sun, 12 Mar 2017 11:51:59 -0700 Subject: [PATCH 0478/1696] [streamable] Update API URL --- youtube_dl/extractor/streamable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py index e973c867c..9f5c237ef 100644 --- a/youtube_dl/extractor/streamable.py +++ b/youtube_dl/extractor/streamable.py @@ -65,7 +65,7 @@ class StreamableIE(InfoExtractor): # to return video info like the title properly sometimes, and doesn't # include info like the video duration video = self._download_json( - 'https://streamable.com/ajax/videos/%s' % video_id, video_id) + 'https://ajax.streamable.com/videos/%s' % video_id, video_id) # Format IDs: # 0 The video is being uploaded From ff9d509d200577a0be962ee47894cd257c7ef818 Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Mon, 13 Mar 2017 01:52:35 +0530 Subject: [PATCH 0479/1696] [openload] Fix extraction Just a minor fix for openload --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 5a5607357..9a42ab895 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -100,7 +100,7 @@ class OpenloadIE(InfoExtractor): i = int(B, 16) index = (h / 2) % 10 A = hashMap[index] - i = i ^ 137 + i = i ^ 96 i = i ^ A video_url_chars.append(compat_chr(i)) h += 2 From e313d209c25bcf8adf3c888516624f5c9b2f2eaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 13 Mar 2017 22:39:15 +0700 Subject: [PATCH 0480/1696] [mitele] Add support for ooyala videos (closes #12430) --- youtube_dl/extractor/mitele.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 79e0b8ada..28b743cca 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import uuid from .common import InfoExtractor +from .ooyala import OoyalaIE from ..compat import ( compat_str, compat_urllib_parse_urlencode, @@ -24,6 +25,9 @@ class MiTeleBaseIE(InfoExtractor): r'(?s)()', webpage, 'ms video player')) video_id = player_data['data-media-id'] + if player_data.get('data-cms-id') == 'ooyala': + return self.url_result( + 'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id) config_url = compat_urlparse.urljoin(url, player_data['data-config']) config = self._download_json( config_url, video_id, 'Downloading config JSON') From 9d089630229e9d921da255fc6d3f671d307a0848 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 13 Mar 2017 22:41:28 +0700 Subject: [PATCH 0481/1696] [telecinco] Add test for #12430 --- youtube_dl/extractor/telecinco.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index d5abfc9e4..fdcc7d573 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -44,6 +44,10 @@ class TelecincoIE(MiTeleBaseIE): }, { 'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html', 'only_matching': True, + }, { + # ooyala video + 'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html', + 'only_matching': True, }] def _real_extract(self, url): From 66bf351f8052fb71dce20c3a5ba1aa507532222e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Mar 2017 00:37:39 +0700 Subject: [PATCH 0482/1696] [facebook] Make title optional (closes #12443) --- youtube_dl/extractor/facebook.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 6315d40c5..b69c1ede0 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -196,6 +196,10 @@ class FacebookIE(InfoExtractor): }, { 'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670', 'only_matching': True, + }, { + # no title + 'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/', + 'only_matching': True, }] @staticmethod @@ -353,15 +357,15 @@ class FacebookIE(InfoExtractor): self._sort_formats(formats) video_title = self._html_search_regex( - r']*class="uiHeaderTitle"[^>]*>([^<]*)

', webpage, 'title', - default=None) + r']*class="uiHeaderTitle"[^>]*>([^<]*)', webpage, + 'title', default=None) if not video_title: video_title = self._html_search_regex( r'(?s)(.*?)', webpage, 'alternative title', default=None) if not video_title: video_title = self._html_search_meta( - 'description', webpage, 'title') + 'description', webpage, 'title', default=None) if video_title: video_title = limit_length(video_title, 80) else: From 398887b4c09b3691379720314f4918bc094d1b7b Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Tue, 14 Mar 2017 05:19:18 +0530 Subject: [PATCH 0483/1696] [Openload] Fixed Extraction They did changed it again. --- youtube_dl/extractor/openload.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 9a42ab895..5ea749f35 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -96,14 +96,16 @@ class OpenloadIE(InfoExtractor): h = 0 while h < len(v): - B = v[h:h + 2] + B = v[h:h + 3] i = int(B, 16) - index = (h / 2) % 10 + if (h / 3) % 3 == 0: + i = int(B, 8) + index = (h / 3) % 10 A = hashMap[index] - i = i ^ 96 + i = i ^ 47 i = i ^ A video_url_chars.append(compat_chr(i)) - h += 2 + h += 3 video_url = 'https://openload.co/stream/%s?mime=true' video_url = video_url % (''.join(video_url_chars)) From 2a751e137ffecf616f04f036bd89c87e967647bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 15 Mar 2017 01:58:59 +0700 Subject: [PATCH 0484/1696] [ChangeLog] Actualize --- ChangeLog | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ChangeLog b/ChangeLog index b1425e630..eaf1f7dbd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,15 @@ version Core * Fix missing subtitles if --add-metadata is used (#12423) +Extractors +* [facebook] Make title optional (#12443) ++ [mitele] Add support for ooyala videos (#12430) +* [openload] Fix extraction (#12435, #12446) +* [streamable] Update API URL (#12433) ++ [crunchyroll] Extract season name (#12428) +* [discoverygo] Bypass geo restriction ++ [discoverygo:playlist] Add support for playlists (#12424) + version 2017.03.10 From 5db83d79bfe192f8a7c80dd44fb2089f114a4189 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 15 Mar 2017 02:01:24 +0700 Subject: [PATCH 0485/1696] release 2017.03.15 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 76e09c42a..cd8592775 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.15** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.10 +[debug] youtube-dl version 2017.03.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index eaf1f7dbd..e10519792 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.03.15 Core * Fix missing subtitles if --add-metadata is used (#12423) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 09dc830cb..cc0309f97 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -208,6 +208,7 @@ - **Digiteka** - **Discovery** - **DiscoveryGo** + - **DiscoveryGoPlaylist** - **Disney** - **Dotsub** - **DouyuTV**: 斗鱼 diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d74046b37..cbe686517 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.10' +__version__ = '2017.03.15' From ba448445b8baa66de92c65793f7ecba8927f0ce8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 15 Mar 2017 01:40:54 +0100 Subject: [PATCH 0486/1696] [redbull] improve extraction - extract 1080p quality - correct ttml subtitle ext - catch api errors - reduce request size --- youtube_dl/extractor/redbulltv.py | 62 +++++++++++++++++++------------ 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py index 5c73d5bca..afab62426 100644 --- a/youtube_dl/extractor/redbulltv.py +++ b/youtube_dl/extractor/redbulltv.py @@ -2,11 +2,13 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( float_or_none, int_or_none, try_get, - unified_timestamp, + # unified_timestamp, + ExtractorError, ) @@ -15,15 +17,15 @@ class RedBullTVIE(InfoExtractor): _TESTS = [{ # film 'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc', - 'md5': '78e860f631d7a846e712fab8c5fe2c38', + 'md5': 'fb0445b98aa4394e504b413d98031d1f', 'info_dict': { 'id': 'AP-1Q756YYX51W11', 'ext': 'mp4', 'title': 'ABC of...WRC', 'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31', 'duration': 1582.04, - 'timestamp': 1488405786, - 'upload_date': '20170301', + # 'timestamp': 1488405786, + # 'upload_date': '20170301', }, }, { # episode @@ -34,8 +36,8 @@ class RedBullTVIE(InfoExtractor): 'title': 'Grime - Hashtags S2 E4', 'description': 'md5:334b741c8c1ce65be057eab6773c1cf5', 'duration': 904.6, - 'timestamp': 1487290093, - 'upload_date': '20170217', + # 'timestamp': 1487290093, + # 'upload_date': '20170217', 'series': 'Hashtags', 'season_number': 2, 'episode_number': 4, @@ -48,29 +50,40 @@ class RedBullTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - access_token = self._download_json( - 'https://api-v2.redbull.tv/start', video_id, + session = self._download_json( + 'https://api-v2.redbull.tv/session', video_id, note='Downloading access token', query={ - 'build': '4.0.9', - 'category': 'smartphone', - 'os_version': 23, - 'os_family': 'android', - })['auth']['access_token'] + 'build': '4.370.0', + 'category': 'personal_computer', + 'os_version': '1.0', + 'os_family': 'http', + }) + if session.get('code') == 'error': + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, session['message'])) + auth = '%s %s' % (session.get('token_type', 'Bearer'), session['access_token']) - info = self._download_json( - 'https://api-v2.redbull.tv/views/%s' % video_id, - video_id, note='Downloading video information', - headers={'Authorization': 'Bearer ' + access_token} - )['blocks'][0]['top'][0] + try: + info = self._download_json( + 'https://api-v2.redbull.tv/content/%s' % video_id, + video_id, note='Downloading video information', + headers={'Authorization': auth} + ) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + error_message = self._parse_json( + e.cause.read().decode(), video_id)['message'] + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, error_message), expected=True) + raise video = info['video_product'] title = info['title'].strip() - m3u8_url = video['url'] formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') + video['url'], video_id, 'mp4', 'm3u8_native') + self._sort_formats(formats) subtitles = {} for _, captions in (try_get( @@ -82,9 +95,12 @@ class RedBullTVIE(InfoExtractor): caption_url = caption.get('url') if not caption_url: continue + ext = caption.get('format') + if ext == 'xml': + ext = 'ttml' subtitles.setdefault(caption.get('lang') or 'en', []).append({ 'url': caption_url, - 'ext': caption.get('format'), + 'ext': ext, }) subheading = info.get('subheading') @@ -97,7 +113,7 @@ class RedBullTVIE(InfoExtractor): 'description': info.get('long_description') or info.get( 'short_description'), 'duration': float_or_none(video.get('duration'), scale=1000), - 'timestamp': unified_timestamp(info.get('published')), + # 'timestamp': unified_timestamp(info.get('published')), 'series': info.get('show_title'), 'season_number': int_or_none(info.get('season_number')), 'episode_number': int_or_none(info.get('episode_number')), From a3096842856d0471b435fb0a85b295da7c4bcf7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 16 Mar 2017 03:28:01 +0700 Subject: [PATCH 0487/1696] [extractor/generic] Add forgotten return for jwplayer formats --- youtube_dl/extractor/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ad47c3b6b..0fcb3fdac 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2554,6 +2554,7 @@ class GenericIE(InfoExtractor): jwplayer_data, video_id, require_title=False) if not info.get('title'): info['title'] = video_title + return info except ExtractorError: pass From b51dc9db0e6ffc6a7725d92fa2c5de45a5b1be20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 16 Mar 2017 03:30:53 +0700 Subject: [PATCH 0488/1696] [extractor/common] Extract SMIL formats from jwplayer --- youtube_dl/extractor/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 78dc5be24..b51799bfa 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2247,6 +2247,9 @@ class InfoExtractor(object): elif ext == 'mpd': formats.extend(self._extract_mpd_formats( source_url, video_id, mpd_id=mpd_id, fatal=False)) + elif ext == 'smil': + formats.extend(self._extract_smil_formats( + source_url, video_id, fatal=False)) # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 elif source_type.startswith('audio') or ext in ( 'oga', 'aac', 'mp3', 'mpeg', 'vorbis'): From 21bfcd3d6e41aed6113c874533fcfe41eb250d96 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 16 Mar 2017 12:50:45 +0100 Subject: [PATCH 0489/1696] [postprocessor/ffmpeg] Add support for flac Requested at http://stackoverflow.com/q/42828041/35070 --- youtube_dl/__init__.py | 2 +- youtube_dl/postprocessor/ffmpeg.py | 34 +++++++++++++++++++----------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c482f9375..2f640607f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -196,7 +196,7 @@ def _real_main(argv=None): if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: raise ValueError('Playlist end must be greater than playlist start') if opts.extractaudio: - if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: + if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: parser.error('invalid audio format specified') if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 96ddb3b36..7c162d92a 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -26,15 +26,25 @@ from ..utils import ( EXT_TO_OUT_FORMATS = { - "aac": "adts", - "m4a": "ipod", - "mka": "matroska", - "mkv": "matroska", - "mpg": "mpeg", - "ogv": "ogg", - "ts": "mpegts", - "wma": "asf", - "wmv": "asf", + 'aac': 'adts', + 'flac': 'flac', + 'm4a': 'ipod', + 'mka': 'matroska', + 'mkv': 'matroska', + 'mpg': 'mpeg', + 'ogv': 'ogg', + 'ts': 'mpegts', + 'wma': 'asf', + 'wmv': 'asf', +} +ACODECS = { + 'mp3': 'libmp3lame', + 'aac': 'aac', + 'flac': 'flac', + 'm4a': 'aac', + 'opus': 'opus', + 'vorbis': 'libvorbis', + 'wav': None, } @@ -237,7 +247,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): acodec = 'copy' extension = 'm4a' more_opts = ['-bsf:a', 'aac_adtstoasc'] - elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']: + elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']: # Lossless if possible acodec = 'copy' extension = filecodec @@ -256,8 +266,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): else: more_opts += ['-b:a', self._preferredquality + 'k'] else: - # We convert the audio (lossy) - acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec] + # We convert the audio (lossy if codec is lossy) + acodec = ACODECS[self._preferredcodec] extension = self._preferredcodec more_opts = [] if self._preferredquality is not None: From 0efbc6b56d2b030e5dc98fa7f533a2e6cd41cf30 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 16 Mar 2017 12:54:47 +0100 Subject: [PATCH 0490/1696] [options] Mention flac support and sort alphabetically among the audio formats --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 8b51d3c6f..6b811535f 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -773,7 +773,7 @@ def parseOpts(overrideArguments=None): help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option( '--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default; No effect without -x') + help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x') postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', From 6ad476079db0dd806877cac1b73232d0ae16d50f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 16 Mar 2017 22:39:48 +0700 Subject: [PATCH 0491/1696] [ChangeLog] Actualize --- ChangeLog | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ChangeLog b/ChangeLog index e10519792..da64f97ea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +version + +Core ++ [postprocessor/ffmpeg] Add support for flac ++ [extractor/common] Extract SMIL formats from jwplayer + +Extractors ++ [generic] Add forgotten return for jwplayer formats +* [redbulltv] Improve extraction + + version 2017.03.15 Core From 7d539ee10a8b0aeefb408ece19ce543f363006bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 16 Mar 2017 22:42:12 +0700 Subject: [PATCH 0492/1696] release 2017.03.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 5 +++-- youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index cd8592775..0e94b6cde 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.15 +[debug] youtube-dl version 2017.03.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index da64f97ea..75a8bd7a6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2017.03.16 Core + [postprocessor/ffmpeg] Add support for flac diff --git a/README.md b/README.md index 0fc5984dc..86b44781c 100644 --- a/README.md +++ b/README.md @@ -375,8 +375,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo (requires ffmpeg or avconv and ffprobe or avprobe) --audio-format FORMAT Specify audio format: "best", "aac", - "vorbis", "mp3", "m4a", "opus", or "wav"; - "best" by default; No effect without -x + "flac", "mp3", "m4a", "opus", "vorbis", or + "wav"; "best" by default; No effect without + -x --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cbe686517..f38f130bf 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.15' +__version__ = '2017.03.16' From 7f3590c43b8ae5cdba3c63e35e786083e3589485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 17 Mar 2017 00:00:01 +0700 Subject: [PATCH 0493/1696] [test_InfoExtractor] Add some realworld tests for _extract_jwplayer_data --- test/test_InfoExtractor.py | 93 +++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 437c7270e..881197afb 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -8,7 +8,7 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL +from test.helper import FakeYDL, expect_dict from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import YoutubeIE, get_info_extractor from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError @@ -84,6 +84,97 @@ class TestInfoExtractor(unittest.TestCase): self.assertRaises(ExtractorError, self.ie._download_json, uri, None) self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) + def test_extract_jwplayer_data_realworld(self): + # from http://www.suffolk.edu/sjc/ + expect_dict( + self, + self.ie._extract_jwplayer_data(r''' + + ''', None, require_title=False), + { + 'id': 'XEgvuql4', + 'formats': [{ + 'url': 'rtmp://192.138.214.154/live/sjclive', + 'ext': 'flv' + }] + }) + + # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/ + expect_dict( + self, + self.ie._extract_jwplayer_data(r''' + + ''', 'dummy', require_title=False), + { + 'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg', + 'formats': [{ + 'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv', + 'ext': 'flv' + }] + }) + + # from http://www.indiedb.com/games/king-machine/videos + expect_dict( + self, + self.ie._extract_jwplayer_data(r''' + + ''', 'dummy'), + { + 'title': 'king machine trailer 1', + 'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg', + 'formats': [{ + 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4', + 'height': 360, + 'ext': 'mp4' + }, { + 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4', + 'height': 720, + 'ext': 'mp4' + }] + }) + if __name__ == '__main__': unittest.main() From ea883a687c054692fcfe3cea15a22269044b64bb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 17 Mar 2017 15:20:12 +0800 Subject: [PATCH 0494/1696] [openload] Fix extraction (closes #10408) Thanks to @makgun02 Ref: http://pastebin.com/raw/JX9gHFUz --- ChangeLog | 6 +++++ youtube_dl/extractor/openload.py | 43 ++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/ChangeLog b/ChangeLog index 75a8bd7a6..eeb5813c5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [openload] Fix extraction (#10408) + + version 2017.03.16 Core diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 5ea749f35..fa876b127 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -78,34 +78,45 @@ class OpenloadIE(InfoExtractor): video_url_chars = [] first_char = ord(ol_id[0]) - key = first_char - 50 + key = first_char - 55 maxKey = max(2, key) - key = min(maxKey, len(ol_id) - 22) - t = ol_id[key:key + 20] + key = min(maxKey, len(ol_id) - 26) + t = ol_id[key:key + 24] hashMap = {} - v = ol_id.replace(t, "") + v = ol_id.replace(t, '') h = 0 while h < len(t): - f = t[h:h + 2] - i = int(f, 16) - hashMap[h / 2] = i - h += 2 + f = t[h:h + 3] + i = int(f, 8) + hashMap[h / 3] = i + h += 3 h = 0 - + H = 0 while h < len(v): - B = v[h:h + 3] + B = '' + C = '' + if len(v) >= h + 2: + B = v[h:h + 2] + if len(v) >= h + 3: + C = v[h:h + 3] i = int(B, 16) - if (h / 3) % 3 == 0: - i = int(B, 8) - index = (h / 3) % 10 + h += 2 + if H % 3 == 0: + i = int(C, 8) + h += 1 + elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60: + i = int(C, 10) + h += 1 + index = H % 8 + A = hashMap[index] - i = i ^ 47 - i = i ^ A + i ^= 213 + i ^= A video_url_chars.append(compat_chr(i)) - h += 3 + H += 1 video_url = 'https://openload.co/stream/%s?mime=true' video_url = video_url % (''.join(video_url_chars)) From 3e5856d860bb94b4dbe2fa38d9c50a6a92bb7401 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 17 Mar 2017 09:53:44 +0100 Subject: [PATCH 0495/1696] [discoverynetworks] add support for more domains and bypass geo restiction --- .../{tlc.py => discoverynetworks.py} | 23 +++++++++++++------ youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 17 insertions(+), 8 deletions(-) rename youtube_dl/extractor/{tlc.py => discoverynetworks.py} (64%) diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/discoverynetworks.py similarity index 64% rename from youtube_dl/extractor/tlc.py rename to youtube_dl/extractor/discoverynetworks.py index fd145ba42..b6653784c 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/discoverynetworks.py @@ -9,13 +9,13 @@ from ..compat import ( compat_parse_qs, compat_urlparse, ) +from ..utils import smuggle_url -class TlcDeIE(InfoExtractor): - IE_NAME = 'tlc.de' - _VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P[^/?#]+)?(?:.*#(?P<id>\d+))?' +class DiscoveryNetworksDeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))' - _TEST = { + _TESTS = [{ 'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', 'info_dict': { 'id': '3235167922001', @@ -29,7 +29,13 @@ class TlcDeIE(InfoExtractor): 'upload_date': '20140404', 'uploader_id': '1659832546', }, - } + }, { + 'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/', + 'only_matching': True, + }, { + 'url': 'http://www.discovery.de/#5332316765001', + 'only_matching': True, + }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s' def _real_extract(self, url): @@ -39,5 +45,8 @@ class TlcDeIE(InfoExtractor): title = mobj.group('title') webpage = self._download_webpage(url, title) brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0] - return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) + brightcove_id = compat_parse_qs(compat_urlparse.urlparse( + brightcove_legacy_url).query)['@videoPlayer'][0] + return self.url_result(smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}), + 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 24c478932..79405b468 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -269,6 +269,7 @@ from .discoverygo import ( DiscoveryGoIE, DiscoveryGoPlaylistIE, ) +from .discoverynetworks import DiscoveryNetworksDeIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dropbox import DropboxIE @@ -973,7 +974,6 @@ from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE from .threeqsdn import ThreeQSDNIE from .tinypic import TinyPicIE -from .tlc import TlcDeIE from .tmz import ( TMZIE, TMZArticleIE, From e7a51a4c0235fafefc672d753017c770a306677a Mon Sep 17 00:00:00 2001 From: mrBliss <dewinant@gmail.com> Date: Tue, 31 Jan 2017 13:59:18 +0100 Subject: [PATCH 0496/1696] [vtm] Add extractor (closes #9974) Implementation of the approach described in #9974. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vtm.py | 136 +++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 youtube_dl/extractor/vtm.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 79405b468..64316d4a8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1174,6 +1174,7 @@ from .voxmedia import VoxMediaIE from .vporn import VpornIE from .vrt import VRTIE from .vrak import VrakIE +from .vtm import VTMIE from .vube import VubeIE from .vuclip import VuClipIE from .vvvvid import VVVVIDIE diff --git a/youtube_dl/extractor/vtm.py b/youtube_dl/extractor/vtm.py new file mode 100644 index 000000000..f0a70040b --- /dev/null +++ b/youtube_dl/extractor/vtm.py @@ -0,0 +1,136 @@ +from __future__ import unicode_literals + +import re + +from .generic import GenericIE +from .common import InfoExtractor +from ..utils import ( + urlencode_postdata, + compat_urllib_parse_urlencode, + ExtractorError, + remove_end, +) + + +class VTMIE(InfoExtractor): + """Download full episodes that require an account from vtm.be or q2.be. + + The generic extractor can be used to download clips that do no require an + account. + """ + _VALID_URL = r'https?://(?:www\.)?(?P<site_id>vtm|q2)\.be/video[/?].+?' + _NETRC_MACHINE = 'vtm' + _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-' + _TESTS = [ + { + 'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch', + 'info_dict': { + 'id': 'vtm_20170219_VM0678361_vtmwatch', + 'ext': 'mp4', + 'title': 'Allemaal Chris afl. 6', + 'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2', + }, + 'skip_download': True, + }, + { + 'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000', + 'only_matching': True, + }, + { + 'url': 'http://vtm.be/video?aid=163157', + 'only_matching': True, + }, + { + 'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2', + 'only_matching': True, + }, + { + 'url': 'http://vtm.be/video?aid=168332', + 'info_dict': { + 'id': 'video?aid=168332', + 'ext': 'mp4', + 'title': 'Videozone', + }, + }, + ] + + def _real_initialize(self): + self._logged_in = False + + def _login(self): + (username, password) = self._get_login_info() + if username is None or password is None: + self.raise_login_required() + + auth_data = { + 'APIKey': self._APIKEY, + 'sdk': 'js_6.1', + 'format': 'json', + 'loginID': username, + 'password': password, + } + + auth_info = self._download_json( + 'https://accounts.eu1.gigya.com/accounts.login', None, + note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(auth_data), fatal=True) + + error_message = auth_info.get('errorDetails') + if error_message: + raise ExtractorError( + 'Unable to login: %s' % error_message, expected=True) + + self._uid = auth_info['UID'] + self._uid_signature = auth_info['UIDSignature'] + self._signature_timestamp = auth_info['signatureTimestamp'] + + self._logged_in = True + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + site_id = mobj.group('site_id') + + webpage = self._download_webpage(url, None, "Downloading webpage") + + # The URL sometimes contains the video id, but not always, e.g., test + # case 3. Fortunately, all webpages of videos requiring authentication + # contain the video id. + video_id = self._search_regex( + r'\\"vodId\\":\\"(.+?)\\"', webpage, 'video_id', default=None) + + # It was most likely a video not requiring authentication. + if not video_id: + return self.url_result(url, 'Generic') + + if not self._logged_in: + self._login() + + title = self._html_search_regex( + r'\\"title\\":\\"(.+?)\\"', webpage, 'title', default=None) + + description = self._html_search_regex( + r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>', + webpage, 'description', default=None) + + data_url = 'http://vod.medialaan.io/api/1.0/item/%s/video' % video_id + m3u8_data = { + 'app_id': 'vtm_watch' if site_id == 'vtm' else 'q2', + 'user_network': 'vtm-sso', + 'UID': self._uid, + 'UIDSignature': self._uid_signature, + 'signatureTimestamp': self._signature_timestamp, + } + data = self._download_json(data_url, video_id, query=m3u8_data) + + formats = self._extract_m3u8_formats( + data['response']['uri'], video_id, entry_protocol='m3u8_native', + ext='mp4', m3u8_id='hls') + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + } From 2a721cdff2da0a9267c96ff2f4c19cda4ce0ab83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Mar 2017 05:58:54 +0700 Subject: [PATCH 0497/1696] [medialaan] Fix and improve extraction (closes #11912) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/medialaan.py | 263 +++++++++++++++++++++++++++++ youtube_dl/extractor/vtm.py | 136 --------------- 3 files changed, 264 insertions(+), 137 deletions(-) create mode 100644 youtube_dl/extractor/medialaan.py delete mode 100644 youtube_dl/extractor/vtm.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 64316d4a8..6b4742ed8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1174,7 +1174,7 @@ from .voxmedia import VoxMediaIE from .vporn import VpornIE from .vrt import VRTIE from .vrak import VrakIE -from .vtm import VTMIE +from .medialaan import MedialaanIE from .vube import VubeIE from .vuclip import VuClipIE from .vvvvid import VVVVIDIE diff --git a/youtube_dl/extractor/medialaan.py b/youtube_dl/extractor/medialaan.py new file mode 100644 index 000000000..e70d4679d --- /dev/null +++ b/youtube_dl/extractor/medialaan.py @@ -0,0 +1,263 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + parse_duration, + try_get, + unified_timestamp, + urlencode_postdata, +) + + +class MedialaanIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + (?P<site_id>vtm|q2|vtmkzoom)\.be/ + (?: + video(?:/[^/]+/id/|/?\?.*?\baid=)| + (?:[^/]+/)* + ) + ) + (?P<id>[^/?#&]+) + ''' + _NETRC_MACHINE = 'medialaan' + _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-' + _SITE_TO_APP_ID = { + 'vtm': 'vtm_watch', + 'q2': 'q2', + 'vtmkzoom': 'vtmkzoom', + } + _TESTS = [{ + # vod + 'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch', + 'info_dict': { + 'id': 'vtm_20170219_VM0678361_vtmwatch', + 'ext': 'mp4', + 'title': 'Allemaal Chris afl. 6', + 'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2', + 'timestamp': 1487533280, + 'upload_date': '20170219', + 'duration': 2562, + 'series': 'Allemaal Chris', + 'season': 'Allemaal Chris', + 'season_number': 1, + 'season_id': '256936078124527', + 'episode': 'Allemaal Chris afl. 6', + 'episode_number': 6, + 'episode_id': '256936078591527', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Requires account credentials', + }, { + # clip + 'url': 'http://vtm.be/video?aid=168332', + 'info_dict': { + 'id': '168332', + 'ext': 'mp4', + 'title': '"Veronique liegt!"', + 'description': 'md5:1385e2b743923afe54ba4adc38476155', + 'timestamp': 1489002029, + 'upload_date': '20170308', + 'duration': 96, + }, + }, { + # vod + 'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000', + 'only_matching': True, + }, { + # vod + 'url': 'http://vtm.be/video?aid=163157', + 'only_matching': True, + }, { + # vod + 'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2', + 'only_matching': True, + }, { + # clip + 'url': 'http://vitaya.be/de-jurk/precies-je-hebt-geen-borsten', + 'only_matching': True, + }, { + # clip + 'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio', + 'only_matching': True, + }] + + def _real_initialize(self): + self._logged_in = False + + def _login(self): + username, password = self._get_login_info() + if username is None: + self.raise_login_required() + + auth_data = { + 'APIKey': self._APIKEY, + 'sdk': 'js_6.1', + 'format': 'json', + 'loginID': username, + 'password': password, + } + + auth_info = self._download_json( + 'https://accounts.eu1.gigya.com/accounts.login', None, + note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(auth_data)) + + error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage') + if error_message: + raise ExtractorError( + 'Unable to login: %s' % error_message, expected=True) + + self._uid = auth_info['UID'] + self._uid_signature = auth_info['UIDSignature'] + self._signature_timestamp = auth_info['signatureTimestamp'] + + self._logged_in = True + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id, site_id = mobj.group('id', 'site_id') + + webpage = self._download_webpage(url, video_id) + + config = self._parse_json( + self._search_regex( + r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);', + webpage, 'config', default='{}'), video_id, + transform_source=lambda s: s.replace( + '\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'")) + + vod_id = config.get('vodId') or self._search_regex( + (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"', + r'<[^>]+id=["\']vod-(\d+)'), + webpage, 'video_id', default=None) + + # clip, no authentication required + if not vod_id: + player = self._parse_json( + self._search_regex( + r'vmmaplayer\(({.+?})\);', webpage, 'vmma player', + default=''), + video_id, transform_source=lambda s: '[%s]' % s, fatal=False) + if player: + video = player[-1] + info = { + 'id': video_id, + 'url': video['videoUrl'], + 'title': video['title'], + 'thumbnail': video.get('imageUrl'), + 'timestamp': int_or_none(video.get('createdDate')), + 'duration': int_or_none(video.get('duration')), + } + else: + info = self._parse_html5_media_entries( + url, webpage, video_id, m3u8_id='hls')[0] + info.update({ + 'id': video_id, + 'title': self._html_search_meta('description', webpage), + 'duration': parse_duration(self._html_search_meta('duration', webpage)), + }) + # vod, authentication required + else: + if not self._logged_in: + self._login() + + settings = self._parse_json( + self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings', default='{}'), + video_id) + + def get(container, item): + return try_get( + settings, lambda x: x[container][item], + compat_str) or self._search_regex( + r'"%s"\s*:\s*"([^"]+)' % item, webpage, item, + default=None) + + app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch') + sso = get('vod', 'gigyaDatabase') or 'vtm-sso' + + data = self._download_json( + 'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id, + video_id, query={ + 'app_id': app_id, + 'user_network': sso, + 'UID': self._uid, + 'UIDSignature': self._uid_signature, + 'signatureTimestamp': self._signature_timestamp, + }) + + formats = self._extract_m3u8_formats( + data['response']['uri'], video_id, entry_protocol='m3u8_native', + ext='mp4', m3u8_id='hls') + + self._sort_formats(formats) + + info = { + 'id': vod_id, + 'formats': formats, + } + + api_key = get('vod', 'apiKey') + channel = get('medialaanGigya', 'channel') + + if api_key: + videos = self._download_json( + 'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False, + query={ + 'channels': channel, + 'ids': vod_id, + 'limit': 1, + 'apikey': api_key, + }) + if videos: + video = try_get( + videos, lambda x: x['response']['videos'][0], dict) + if video: + def get(container, item, expected_type=None): + return try_get( + video, lambda x: x[container][item], expected_type) + + def get_string(container, item): + return get(container, item, compat_str) + + info.update({ + 'series': get_string('program', 'title'), + 'season': get_string('season', 'title'), + 'season_number': int_or_none(get('season', 'number')), + 'season_id': get_string('season', 'id'), + 'episode': get_string('episode', 'title'), + 'episode_number': int_or_none(get('episode', 'number')), + 'episode_id': get_string('episode', 'id'), + 'duration': int_or_none( + video.get('duration')) or int_or_none( + video.get('durationMillis'), scale=1000), + 'title': get_string('episode', 'title'), + 'description': get_string('episode', 'text'), + 'timestamp': unified_timestamp(get_string( + 'publication', 'begin')), + }) + + if not info.get('title'): + info['title'] = try_get( + config, lambda x: x['videoConfig']['title'], + compat_str) or self._html_search_regex( + r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title', + default=None) or self._og_search_title(webpage) + + if not info.get('description'): + info['description'] = self._html_search_regex( + r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>', + webpage, 'description', default=None) + + return info diff --git a/youtube_dl/extractor/vtm.py b/youtube_dl/extractor/vtm.py deleted file mode 100644 index f0a70040b..000000000 --- a/youtube_dl/extractor/vtm.py +++ /dev/null @@ -1,136 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .generic import GenericIE -from .common import InfoExtractor -from ..utils import ( - urlencode_postdata, - compat_urllib_parse_urlencode, - ExtractorError, - remove_end, -) - - -class VTMIE(InfoExtractor): - """Download full episodes that require an account from vtm.be or q2.be. - - The generic extractor can be used to download clips that do no require an - account. - """ - _VALID_URL = r'https?://(?:www\.)?(?P<site_id>vtm|q2)\.be/video[/?].+?' - _NETRC_MACHINE = 'vtm' - _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-' - _TESTS = [ - { - 'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch', - 'info_dict': { - 'id': 'vtm_20170219_VM0678361_vtmwatch', - 'ext': 'mp4', - 'title': 'Allemaal Chris afl. 6', - 'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2', - }, - 'skip_download': True, - }, - { - 'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000', - 'only_matching': True, - }, - { - 'url': 'http://vtm.be/video?aid=163157', - 'only_matching': True, - }, - { - 'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2', - 'only_matching': True, - }, - { - 'url': 'http://vtm.be/video?aid=168332', - 'info_dict': { - 'id': 'video?aid=168332', - 'ext': 'mp4', - 'title': 'Videozone', - }, - }, - ] - - def _real_initialize(self): - self._logged_in = False - - def _login(self): - (username, password) = self._get_login_info() - if username is None or password is None: - self.raise_login_required() - - auth_data = { - 'APIKey': self._APIKEY, - 'sdk': 'js_6.1', - 'format': 'json', - 'loginID': username, - 'password': password, - } - - auth_info = self._download_json( - 'https://accounts.eu1.gigya.com/accounts.login', None, - note='Logging in', errnote='Unable to log in', - data=urlencode_postdata(auth_data), fatal=True) - - error_message = auth_info.get('errorDetails') - if error_message: - raise ExtractorError( - 'Unable to login: %s' % error_message, expected=True) - - self._uid = auth_info['UID'] - self._uid_signature = auth_info['UIDSignature'] - self._signature_timestamp = auth_info['signatureTimestamp'] - - self._logged_in = True - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - site_id = mobj.group('site_id') - - webpage = self._download_webpage(url, None, "Downloading webpage") - - # The URL sometimes contains the video id, but not always, e.g., test - # case 3. Fortunately, all webpages of videos requiring authentication - # contain the video id. - video_id = self._search_regex( - r'\\"vodId\\":\\"(.+?)\\"', webpage, 'video_id', default=None) - - # It was most likely a video not requiring authentication. - if not video_id: - return self.url_result(url, 'Generic') - - if not self._logged_in: - self._login() - - title = self._html_search_regex( - r'\\"title\\":\\"(.+?)\\"', webpage, 'title', default=None) - - description = self._html_search_regex( - r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>', - webpage, 'description', default=None) - - data_url = 'http://vod.medialaan.io/api/1.0/item/%s/video' % video_id - m3u8_data = { - 'app_id': 'vtm_watch' if site_id == 'vtm' else 'q2', - 'user_network': 'vtm-sso', - 'UID': self._uid, - 'UIDSignature': self._uid_signature, - 'signatureTimestamp': self._signature_timestamp, - } - data = self._download_json(data_url, video_id, query=m3u8_data) - - formats = self._extract_m3u8_formats( - data['response']['uri'], video_id, entry_protocol='m3u8_native', - ext='mp4', m3u8_id='hls') - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'formats': formats, - } From 5f0daab1ca60803f4f49b344ddb3757c418a2d8e Mon Sep 17 00:00:00 2001 From: Vijay Singh <sudovijay@users.noreply.github.com> Date: Sat, 18 Mar 2017 04:32:55 +0530 Subject: [PATCH 0498/1696] [openload] Fix extraction --- youtube_dl/extractor/openload.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index fa876b127..435aec28e 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -80,8 +80,8 @@ class OpenloadIE(InfoExtractor): first_char = ord(ol_id[0]) key = first_char - 55 maxKey = max(2, key) - key = min(maxKey, len(ol_id) - 26) - t = ol_id[key:key + 24] + key = min(maxKey, len(ol_id) - 38) + t = ol_id[key:key + 36] hashMap = {} v = ol_id.replace(t, '') @@ -110,7 +110,7 @@ class OpenloadIE(InfoExtractor): elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60: i = int(C, 10) h += 1 - index = H % 8 + index = H % 12 A = hashMap[index] i ^= 213 From febfe1e2626bab5dbb8d4e0bbe31aa225ce09d35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Mar 2017 06:19:11 +0700 Subject: [PATCH 0499/1696] [adobepass] Detect and output error on authz token extraction (#12472) --- youtube_dl/extractor/adobepass.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index d4816abf5..1b2d364ca 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -1458,6 +1458,8 @@ class AdobePassIE(InfoExtractor): self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue + if '<error' in authorize: + raise ExtractorError(xml_text(authorize, 'details'), expected=True) authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) requestor_info[guid] = authz_token self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) From f68ef1e2abd876ffca65544fd3e42756f9c33be3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Mar 2017 23:23:47 +0700 Subject: [PATCH 0500/1696] [medialaan] Remove unrelated test --- youtube_dl/extractor/medialaan.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/medialaan.py b/youtube_dl/extractor/medialaan.py index e70d4679d..6e067474b 100644 --- a/youtube_dl/extractor/medialaan.py +++ b/youtube_dl/extractor/medialaan.py @@ -81,10 +81,6 @@ class MedialaanIE(InfoExtractor): # vod 'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2', 'only_matching': True, - }, { - # clip - 'url': 'http://vitaya.be/de-jurk/precies-je-hebt-geen-borsten', - 'only_matching': True, }, { # clip 'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio', From 772b5ff57f702dd76986d1db17068da2116a2800 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 19 Mar 2017 00:45:04 +0100 Subject: [PATCH 0501/1696] [toongoggles] Add new extractor(closes #12171) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/toongoggles.py | 81 +++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 youtube_dl/extractor/toongoggles.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6b4742ed8..97d68d9ca 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -986,6 +986,7 @@ from .tnaflix import ( ) from .toggle import ToggleIE from .tonline import TOnlineIE +from .toongoggles import ToonGogglesIE from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE diff --git a/youtube_dl/extractor/toongoggles.py b/youtube_dl/extractor/toongoggles.py new file mode 100644 index 000000000..b5ba1c01d --- /dev/null +++ b/youtube_dl/extractor/toongoggles.py @@ -0,0 +1,81 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, +) + + +class ToonGogglesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?toongoggles\.com/shows/(?P<show_id>\d+)(?:/[^/]+/episodes/(?P<episode_id>\d+))?' + _TESTS = [{ + 'url': 'http://www.toongoggles.com/shows/217143/bernard-season-2/episodes/217147/football', + 'md5': '18289fc2b951eff6b953a9d8f01e6831', + 'info_dict': { + 'id': '217147', + 'ext': 'mp4', + 'title': 'Football', + 'uploader_id': '1', + 'description': 'Bernard decides to play football in order to be better than Lloyd and tries to beat him no matter how, he even cheats.', + 'upload_date': '20160718', + 'timestamp': 1468879330, + } + }, { + 'url': 'http://www.toongoggles.com/shows/227759/om-nom-stories-around-the-world', + 'info_dict': { + 'id': '227759', + 'title': 'Om Nom Stories Around The World', + }, + 'playlist_mincount': 11, + }] + + def _call_api(self, action, page_id, query): + query.update({ + 'for_ng': 1, + 'for_web': 1, + 'show_meta': 1, + 'version': 7.0, + }) + return self._download_json('http://api.toongoggles.com/' + action, page_id, query=query) + + def _parse_episode_data(self, episode_data): + title = episode_data['episode_name'] + + return { + '_type': 'url_transparent', + 'id': episode_data['episode_id'], + 'title': title, + 'url': 'kaltura:513551:' + episode_data['entry_id'], + 'thumbnail': episode_data.get('thumbnail_url'), + 'description': episode_data.get('description'), + 'duration': parse_duration(episode_data.get('hms')), + 'series': episode_data.get('show_name'), + 'season_number': int_or_none(episode_data.get('season_num')), + 'episode_id': episode_data.get('episode_id'), + 'episode': title, + 'episode_number': int_or_none(episode_data.get('episode_num')), + 'categories': episode_data.get('categories'), + 'ie_key': 'Kaltura', + } + + def _real_extract(self, url): + show_id, episode_id = re.match(self._VALID_URL, url).groups() + if episode_id: + episode_data = self._call_api('search', episode_id, { + 'filter': 'episode', + 'id': episode_id, + })['objects'][0] + return self._parse_episode_data(episode_data) + else: + show_data = self._call_api('getepisodesbyshow', show_id, { + 'max': 1000000000, + 'showid': show_id, + }) + entries = [] + for episode_data in show_data.get('objects', []): + entries.append(self._parse_episode_data(episode_data)) + return self.playlist_result(entries, show_id, show_data.get('show_name')) From 46b18f2349670d395b9d84a57ee3d9b5d221ff4b Mon Sep 17 00:00:00 2001 From: John Hawkinson <jhawk@mit.edu> Date: Wed, 8 Mar 2017 18:13:54 -0500 Subject: [PATCH 0502/1696] [BostonGlobe] New. Nonstandard version of Brightcove. Has a "data-brightcove-video-id" instead of a "data-video-id," otherwise pretty much just Brightcove. Except the Globe isn't all Brightcove videos, so fallback to Generic, too. Also, abstract playlist_from_matches() from generic.py to common.py, and use it here. History of these changes can be found in 51170427d4b1143572a498dedaee61863a5b2c5b. --- youtube_dl/extractor/bostonglobe.py | 72 +++++++++++++++++++++++++++ youtube_dl/extractor/common.py | 28 +++++++---- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 77 +++++++++++++---------------- 4 files changed, 126 insertions(+), 52 deletions(-) create mode 100644 youtube_dl/extractor/bostonglobe.py diff --git a/youtube_dl/extractor/bostonglobe.py b/youtube_dl/extractor/bostonglobe.py new file mode 100644 index 000000000..57882fbee --- /dev/null +++ b/youtube_dl/extractor/bostonglobe.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..utils import ( + extract_attributes, +) + + +class BostonGlobeIE(InfoExtractor): + _VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?' + _TESTS = [ + { + 'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html', + 'md5': '0a62181079c85c2d2b618c9a738aedaf', + 'info_dict': { + 'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood', + 'id': '5320421710001', + 'ext': 'mp4', + 'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.', + 'timestamp': 1486877593, + 'upload_date': '20170212', + 'uploader_id': '245991542', + }, + }, + { + # Embedded youtube video; we hand it off to the Generic extractor. + 'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html', + 'md5': '582b40327089d5c0c949b3c54b13c24b', + 'info_dict': { + 'title': "Who Is Matt Damon's Favorite Batman?", + 'id': 'ZW1QCnlA6Qc', + 'ext': 'mp4', + 'upload_date': '20170217', + 'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb', + 'uploader': 'The Late Late Show with James Corden', + 'uploader_id': 'TheLateLateShow', + }, + 'expected_warnings': ['404'], + }, + ] + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + page_title = self._og_search_title(webpage, default=None) + + # <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject"> + entries = [] + for video in re.findall(r'(?i)(<video[^>]+>)', webpage): + attrs = extract_attributes(video) + + video_id = attrs.get('data-brightcove-video-id') + account_id = attrs.get('data-account') + player_id = attrs.get('data-player') + embed = attrs.get('data-embed') + + if video_id and account_id and player_id and embed: + entries.append( + 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' + % (account_id, player_id, embed, video_id)) + + if len(entries) == 0: + return self.url_result(url, 'Generic') + elif len(entries) == 1: + return self.url_result(entries[0], 'BrightcoveNew') + else: + return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b51799bfa..0852b8e8c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -36,34 +36,35 @@ from ..utils import ( clean_html, compiled_regex_type, determine_ext, + determine_protocol, error_to_compat_str, ExtractorError, + extract_attributes, fix_xml_ampersands, float_or_none, GeoRestrictedError, GeoUtils, int_or_none, js_to_json, + mimetype2ext, + orderedSet, + parse_codecs, + parse_duration, parse_iso8601, + parse_m3u8_attributes, RegexNotFoundError, - sanitize_filename, sanitized_Request, + sanitize_filename, unescapeHTML, unified_strdate, unified_timestamp, + update_Request, + update_url_query, + urljoin, url_basename, xpath_element, xpath_text, xpath_with_ns, - determine_protocol, - parse_duration, - mimetype2ext, - update_Request, - update_url_query, - parse_m3u8_attributes, - extract_attributes, - parse_codecs, - urljoin, ) @@ -714,6 +715,13 @@ class InfoExtractor(object): video_info['title'] = video_title return video_info + def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None): + urlrs = orderedSet( + self.url_result(self._proto_relative_url(getter(m) if getter else m), ie) + for m in matches) + return self.playlist_result( + urlrs, playlist_id=video_id, playlist_title=video_title) + @staticmethod def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None): """Returns a playlist""" diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 97d68d9ca..40a5c9842 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -117,6 +117,7 @@ from .bleacherreport import ( from .blinkx import BlinkxIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE +from .bostonglobe import BostonGlobeIE from .bpb import BpbIE from .br import BRIE from .bravotv import BravoTVIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0fcb3fdac..a71d6bac0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1841,14 +1841,6 @@ class GenericIE(InfoExtractor): video_description = self._og_search_description(webpage, default=None) video_thumbnail = self._og_search_thumbnail(webpage, default=None) - # Helper method - def _playlist_from_matches(matches, getter=None, ie=None): - urlrs = orderedSet( - self.url_result(self._proto_relative_url(getter(m) if getter else m), ie) - for m in matches) - return self.playlist_result( - urlrs, playlist_id=video_id, playlist_title=video_title) - # Look for Brightcove Legacy Studio embeds bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage) if bc_urls: @@ -1869,28 +1861,28 @@ class GenericIE(InfoExtractor): # Look for Brightcove New Studio embeds bc_urls = BrightcoveNewIE._extract_urls(webpage) if bc_urls: - return _playlist_from_matches(bc_urls, ie='BrightcoveNew') + return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew') # Look for ThePlatform embeds tp_urls = ThePlatformIE._extract_urls(webpage) if tp_urls: - return _playlist_from_matches(tp_urls, ie='ThePlatform') + return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform') # Look for Vessel embeds vessel_urls = VesselIE._extract_urls(webpage) if vessel_urls: - return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key()) + return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key()) # Look for embedded rtl.nl player matches = re.findall( r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', webpage) if matches: - return _playlist_from_matches(matches, ie='RtlNl') + return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl') vimeo_urls = VimeoIE._extract_urls(url, webpage) if vimeo_urls: - return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key()) + return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key()) vid_me_embed_url = self._search_regex( r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', @@ -1912,25 +1904,25 @@ class GenericIE(InfoExtractor): (?:embed|v|p)/.+?) \1''', webpage) if matches: - return _playlist_from_matches( - matches, lambda m: unescapeHTML(m[1])) + return self.playlist_from_matches( + matches, video_id, video_title, lambda m: unescapeHTML(m[1])) # Look for lazyYT YouTube embed matches = re.findall( r'class="lazyYT" data-youtube-id="([^"]+)"', webpage) if matches: - return _playlist_from_matches(matches, lambda m: unescapeHTML(m)) + return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m)) # Look for Wordpress "YouTube Video Importer" plugin matches = re.findall(r'''(?x)<div[^>]+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) if matches: - return _playlist_from_matches(matches, lambda m: m[-1]) + return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1]) matches = DailymotionIE._extract_urls(webpage) if matches: - return _playlist_from_matches(matches) + return self.playlist_from_matches(matches, video_id, video_title) # Look for embedded Dailymotion playlist player (#3822) m = re.search( @@ -1939,8 +1931,8 @@ class GenericIE(InfoExtractor): playlists = re.findall( r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url'))) if playlists: - return _playlist_from_matches( - playlists, lambda p: '//dailymotion.com/playlist/%s' % p) + return self.playlist_from_matches( + playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) # Look for embedded Wistia player match = re.search( @@ -2047,8 +2039,9 @@ class GenericIE(InfoExtractor): if mobj is not None: embeds = self._parse_json(mobj.group(1), video_id, fatal=False) if embeds: - return _playlist_from_matches( - embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala') + return self.playlist_from_matches( + embeds, video_id, video_title, + getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala') # Look for Aparat videos mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage) @@ -2110,13 +2103,13 @@ class GenericIE(InfoExtractor): # Look for funnyordie embed matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) if matches: - return _playlist_from_matches( - matches, getter=unescapeHTML, ie='FunnyOrDie') + return self.playlist_from_matches( + matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie') # Look for BBC iPlayer embed matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage) if matches: - return _playlist_from_matches(matches, ie='BBCCoUk') + return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk') # Look for embedded RUTV player rutv_url = RUTVIE._extract_url(webpage) @@ -2131,32 +2124,32 @@ class GenericIE(InfoExtractor): # Look for embedded SportBox player sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) if sportbox_urls: - return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') + return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed') # Look for embedded XHamster player xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) if xhamster_urls: - return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed') + return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed') # Look for embedded TNAFlixNetwork player tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage) if tnaflix_urls: - return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key()) + return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key()) # Look for embedded PornHub player pornhub_urls = PornHubIE._extract_urls(webpage) if pornhub_urls: - return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key()) + return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key()) # Look for embedded DrTuber player drtuber_urls = DrTuberIE._extract_urls(webpage) if drtuber_urls: - return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key()) + return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key()) # Look for embedded RedTube player redtube_urls = RedTubeIE._extract_urls(webpage) if redtube_urls: - return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key()) + return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key()) # Look for embedded Tvigle player mobj = re.search( @@ -2202,12 +2195,12 @@ class GenericIE(InfoExtractor): # Look for embedded soundcloud player soundcloud_urls = SoundcloudIE._extract_urls(webpage) if soundcloud_urls: - return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) + return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key()) # Look for tunein player tunein_urls = TuneInBaseIE._extract_urls(webpage) if tunein_urls: - return _playlist_from_matches(tunein_urls) + return self.playlist_from_matches(tunein_urls, video_id, video_title) # Look for embedded mtvservices player mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage) @@ -2490,35 +2483,35 @@ class GenericIE(InfoExtractor): # Look for DBTV embeds dbtv_urls = DBTVIE._extract_urls(webpage) if dbtv_urls: - return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key()) + return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key()) # Look for Videa embeds videa_urls = VideaIE._extract_urls(webpage) if videa_urls: - return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key()) + return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key()) # Look for 20 minuten embeds twentymin_urls = TwentyMinutenIE._extract_urls(webpage) if twentymin_urls: - return _playlist_from_matches( - twentymin_urls, ie=TwentyMinutenIE.ie_key()) + return self.playlist_from_matches( + twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key()) # Look for Openload embeds openload_urls = OpenloadIE._extract_urls(webpage) if openload_urls: - return _playlist_from_matches( - openload_urls, ie=OpenloadIE.ie_key()) + return self.playlist_from_matches( + openload_urls, video_id, video_title, ie=OpenloadIE.ie_key()) # Look for VideoPress embeds videopress_urls = VideoPressIE._extract_urls(webpage) if videopress_urls: - return _playlist_from_matches( - videopress_urls, ie=VideoPressIE.ie_key()) + return self.playlist_from_matches( + videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key()) # Look for Rutube embeds rutube_urls = RutubeIE._extract_urls(webpage) if rutube_urls: - return _playlist_from_matches( + return self.playlist_from_matches( rutube_urls, ie=RutubeIE.ie_key()) # Looking for http://schema.org/VideoObject From 68220649fa0b1c06c16a80ce51cc21f8d3264a4c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 19 Mar 2017 20:42:17 +0800 Subject: [PATCH 0503/1696] [ChangeLog] Update after #12099 --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index eeb5813c5..d70637b69 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [bostonglobe] Add extractor for bostonglobe.com (#12099) * [openload] Fix extraction (#10408) From 45e6ad21b4f024c1721dc3dd2b53f15d7efa8aa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Mar 2017 23:48:02 +0700 Subject: [PATCH 0504/1696] Credit @mrBliss for vtm (#11912) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 273a6a034..2d676b210 100644 --- a/AUTHORS +++ b/AUTHORS @@ -209,3 +209,4 @@ Olivier Bilodeau Lars Vierbergen Juanjo Benages Xiao Di Guan +Thomas Winant From 9487ce03e998337cbc69db250009e11b52c3b255 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Mar 2017 23:59:40 +0700 Subject: [PATCH 0505/1696] [YoutubeDL] Allow multiple input URLs to be used with stdout as output template --- youtube_dl/YoutubeDL.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 13a3a909e..cb502c26f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1872,6 +1872,7 @@ class YoutubeDL(object): """Download a given list of URLs.""" outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) if (len(url_list) > 1 and + outtmpl != '-' and '%' not in outtmpl and self.params.get('max_downloads') != 1): raise SameFileError(outtmpl) From 0ecdd3adbd104786c901944a316b87f58056bcdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Mar 2017 00:03:58 +0700 Subject: [PATCH 0506/1696] [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index d70637b69..5a316f0ac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,15 @@ version <unreleased> +Core ++ [YoutubeDL] Allow multiple input URLs to be used with stdout (-) as + output template ++ [adobepass] Detect and output error on authz token extraction (#12472) + Extractors + [bostonglobe] Add extractor for bostonglobe.com (#12099) ++ [toongoggles] Add support for toongoggles.com (#12171) ++ [medialaan] Add support for Medialaan sites (#9974, #11912) ++ [discoverynetworks] Add support for more domains and bypass geo restiction * [openload] Fix extraction (#10408) From 0e9a73e6120965fc2c2a1a2a1a30f7d38af4c73a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Mar 2017 00:07:57 +0700 Subject: [PATCH 0507/1696] release 2017.03.20 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 5 ++++- youtube_dl/version.py | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 0e94b6cde..4273fedbf 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.20*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.20** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.16 +[debug] youtube-dl version 2017.03.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 5a316f0ac..bbbf3c34d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.03.20 Core + [YoutubeDL] Allow multiple input URLs to be used with stdout (-) as diff --git a/docs/supportedsites.md b/docs/supportedsites.md index cc0309f97..6a7be28cb 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -108,6 +108,7 @@ - **blinkx** - **Bloomberg** - **BokeCC** + - **BostonGlobe** - **Bpb**: Bundeszentrale für politische Bildung - **BR**: Bayerischer Rundfunk Mediathek - **BravoTV** @@ -209,6 +210,7 @@ - **Discovery** - **DiscoveryGo** - **DiscoveryGoPlaylist** + - **DiscoveryNetworksDe** - **Disney** - **Dotsub** - **DouyuTV**: 斗鱼 @@ -425,6 +427,7 @@ - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** + - **Medialaan** - **Meipai**: 美拍 - **MelonVOD** - **META** @@ -777,12 +780,12 @@ - **ThisAV** - **ThisOldHouse** - **tinypic**: tinypic.com videos - - **tlc.de** - **TMZ** - **TMZArticle** - **TNAFlix** - **TNAFlixNetworkEmbed** - **toggle** + - **ToonGoggles** - **Tosh**: Tosh.0 - **tou.tv** - **Toypics**: Toypics user profile diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f38f130bf..a65f2e741 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.16' +__version__ = '2017.03.20' From 957f453429d584615ac4d2277caeb0d75d0fe1d9 Mon Sep 17 00:00:00 2001 From: Vijay Singh <sudovijay@users.noreply.github.com> Date: Mon, 20 Mar 2017 09:22:32 +0530 Subject: [PATCH 0508/1696] [Openload.co] Fixed Extraction They did it again. just a minor change though. here's quick fix --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 435aec28e..58ffde541 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -110,7 +110,7 @@ class OpenloadIE(InfoExtractor): elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60: i = int(C, 10) h += 1 - index = H % 12 + index = H % 7 A = hashMap[index] i ^= 213 From 8a8cc339b6b5189b3c1fdb15ef7224c035b21a6b Mon Sep 17 00:00:00 2001 From: John Hawkinson <jhawk@mit.edu> Date: Mon, 20 Mar 2017 11:35:13 -0400 Subject: [PATCH 0509/1696] [senateisvp] Allow https URL scheme for embeds --- youtube_dl/extractor/senateisvp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index 387a4f7f6..db5ef8b57 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -89,7 +89,7 @@ class SenateISVPIE(InfoExtractor): @staticmethod def _search_iframe_url(webpage): mobj = re.search( - r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", + r"<iframe[^>]+src=['\"](?P<url>https?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", webpage) if mobj: return mobj.group('url') From 97952bdb78854bf09c688eb535dc7b67265934c1 Mon Sep 17 00:00:00 2001 From: John Hawkinson <jhawk@mit.edu> Date: Tue, 21 Mar 2017 13:12:14 -0400 Subject: [PATCH 0510/1696] [generic] Add test for Senate ISVP iframe embed --- youtube_dl/extractor/generic.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a71d6bac0..cb6308d29 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1542,6 +1542,17 @@ class GenericIE(InfoExtractor): 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm', 'only_matching': True, }, + { + # Senate ISVP iframe https + 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security', + 'md5': 'fb8c70b0b515e5037981a2492099aab8', + 'info_dict': { + 'id': 'govtaff020316', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player', + }, + 'add_ie': [SenateISVPIE.ie_key()], + }, # { # # TODO: find another test # # http://schema.org/VideoObject From 21fbf0f955f584ad2d02608850a69a2fd74b65a6 Mon Sep 17 00:00:00 2001 From: Throaway <Throaway@null.com> Date: Mon, 20 Mar 2017 16:29:39 -0700 Subject: [PATCH 0511/1696] [pornhub] Decode obfuscated video URL (closes #12470) --- youtube_dl/extractor/pornhub.py | 37 ++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 9b413590a..eb316ad14 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -1,7 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals +import functools import itertools +import operator # import os import re @@ -129,9 +131,38 @@ class PornHubIE(InfoExtractor): tv_webpage = dl_webpage('tv') - video_url = self._search_regex( - r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage, - 'video url', group='url') + encoded_url = self._search_regex(r'(var.*mediastring.*)</script>', + tv_webpage, 'encoded url') + assignments = encoded_url.split(";") + js_vars = {} + + def parse_js_value(inp): + inp = re.sub(r'/\*[^*]*\*/', "", inp) + + if "+" in inp: + inps = inp.split("+") + return functools.reduce(operator.concat, map(parse_js_value, inps)) + + inp = inp.strip() + if inp in js_vars: + return js_vars[inp] + + # Hope it's a string! + assert inp.startswith('"') and inp.endswith('"') + return inp[1:-1] + + for assn in assignments: + assn = assn.strip() + if len(assn) == 0: + continue + + assert assn.startswith("var ") + assn = assn[4:] + vname, value = assn.split("=", 1) + + js_vars[vname] = parse_js_value(value) + + video_url = js_vars["mediastring"] title = self._search_regex( r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None) From e1e35d1ac66ab99202e8265ac811906de2aa87dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Mar 2017 01:59:27 +0700 Subject: [PATCH 0512/1696] [pornhub] Improve extraction and style (closes #12515) --- youtube_dl/extractor/pornhub.py | 35 ++++++++++++++------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index eb316ad14..b25f1f193 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -20,6 +20,7 @@ from ..utils import ( js_to_json, orderedSet, # sanitized_Request, + remove_quotes, str_to_int, ) # from ..aes import ( @@ -131,38 +132,32 @@ class PornHubIE(InfoExtractor): tv_webpage = dl_webpage('tv') - encoded_url = self._search_regex(r'(var.*mediastring.*)</script>', - tv_webpage, 'encoded url') - assignments = encoded_url.split(";") + assignments = self._search_regex( + r'(var.+?mediastring.+?)</script>', tv_webpage, + 'encoded url').split(';') + js_vars = {} def parse_js_value(inp): - inp = re.sub(r'/\*[^*]*\*/', "", inp) - - if "+" in inp: - inps = inp.split("+") - return functools.reduce(operator.concat, map(parse_js_value, inps)) - + inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) + if '+' in inp: + inps = inp.split('+') + return functools.reduce( + operator.concat, map(parse_js_value, inps)) inp = inp.strip() if inp in js_vars: return js_vars[inp] - - # Hope it's a string! - assert inp.startswith('"') and inp.endswith('"') - return inp[1:-1] + return remove_quotes(inp) for assn in assignments: assn = assn.strip() - if len(assn) == 0: + if not assn: continue - - assert assn.startswith("var ") - assn = assn[4:] - vname, value = assn.split("=", 1) - + assn = re.sub(r'var\s+', '', assn) + vname, value = assn.split('=', 1) js_vars[vname] = parse_js_value(value) - video_url = js_vars["mediastring"] + video_url = js_vars['mediastring'] title = self._search_regex( r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None) From 8e5a7c5e67a8fad446d22a7619cd6a09823a05e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Mar 2017 02:28:04 +0700 Subject: [PATCH 0513/1696] [pluralsight] Omit module title from video title (closes #12506) --- youtube_dl/extractor/pluralsight.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index e0cbd045e..0c6e036ca 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -40,7 +40,7 @@ class PluralsightIE(PluralsightBaseIE): 'info_dict': { 'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04', 'ext': 'mp4', - 'title': 'Management of SQL Server - Demo Monitoring', + 'title': 'Demo Monitoring', 'duration': 338, }, 'skip': 'Requires pluralsight account credentials', @@ -187,7 +187,7 @@ class PluralsightIE(PluralsightBaseIE): if not clip: raise ExtractorError('Unable to resolve clip') - title = '%s - %s' % (module['title'], clip['title']) + title = clip['title'] QUALITIES = { 'low': {'width': 640, 'height': 480}, From e8686e51d77607347802f82c57278e7d675d022c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Mar 2017 02:35:09 +0700 Subject: [PATCH 0514/1696] [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index bbbf3c34d..82e82b588 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +version <unreleased> + +Extractors +- [pluralsight] Omit module title from video title (#12506) +* [pornhub] Decode obfuscated video URL (#12470, #12515) +* [senateisvp] Allow https URL scheme for embeds (#12512) + + version 2017.03.20 Core From 093dad9e256e3237dfad3c57a19ae10ddadcbf9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Mar 2017 02:36:50 +0700 Subject: [PATCH 0515/1696] release 2017.03.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4273fedbf..31ba1de3d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.20*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.20** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.20 +[debug] youtube-dl version 2017.03.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 82e82b588..dc5acbca9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.03.22 Extractors - [pluralsight] Omit module title from video title (#12506) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a65f2e741..4d722873d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.20' +__version__ = '2017.03.22' From c183e14f89078593ab47f06e5076f00bee3c9dd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Mar 2017 22:26:59 +0700 Subject: [PATCH 0516/1696] [viu] Relax _VALID_URL (closes #12529) --- youtube_dl/extractor/viu.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viu.py b/youtube_dl/extractor/viu.py index 3fd889c8e..db6a65d2e 100644 --- a/youtube_dl/extractor/viu.py +++ b/youtube_dl/extractor/viu.py @@ -44,7 +44,7 @@ class ViuBaseIE(InfoExtractor): class ViuIE(ViuBaseIE): - _VALID_URL = r'(?:viu:|https?://www\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)' + _VALID_URL = r'(?:viu:|https?://[^/]+\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059', 'info_dict': { @@ -69,6 +69,9 @@ class ViuIE(ViuBaseIE): 'skip_download': 'm3u8 download', }, 'skip': 'Geo-restricted to Indonesia', + }, { + 'url': 'https://india.viu.com/en/media/1126286865', + 'only_matching': True, }] def _real_extract(self, url): From 391d076d7cf037b1d7849ea7cbbdd04950c46f96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Mar 2017 23:22:14 +0700 Subject: [PATCH 0517/1696] [condenast] Fix extraction and style (closes #12526) --- youtube_dl/extractor/condenast.py | 43 +++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 8d8f60598..d3463b874 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -9,13 +9,14 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( - orderedSet, - remove_end, - extract_attributes, - mimetype2ext, determine_ext, + extract_attributes, int_or_none, + js_to_json, + mimetype2ext, + orderedSet, parse_iso8601, + remove_end, ) @@ -66,6 +67,16 @@ class CondeNastIE(InfoExtractor): 'upload_date': '20130314', 'timestamp': 1363219200, } + }, { + 'url': 'http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series', + 'info_dict': { + 'id': '58d1865bfd2e6126e2000015', + 'ext': 'mp4', + 'title': 'The Only True Surprise? Trump’s an Idiot', + 'uploader': 'gq', + 'upload_date': '20170321', + 'timestamp': 1490126427, + }, }, { # JS embed 'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js', @@ -114,26 +125,33 @@ class CondeNastIE(InfoExtractor): }) video_id = query['videoId'] video_info = None - info_page = self._download_webpage( + info_page = self._download_json( 'http://player.cnevids.com/player/video.js', - video_id, 'Downloading video info', query=query, fatal=False) + video_id, 'Downloading video info', fatal=False, query=query) if info_page: - video_info = self._parse_json(self._search_regex( - r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video'] - else: + video_info = info_page.get('video') + if not video_info: info_page = self._download_webpage( 'http://player.cnevids.com/player/loader.js', video_id, 'Downloading loader info', query=query) - video_info = self._parse_json(self._search_regex( - r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id) + video_info = self._parse_json( + self._search_regex( + r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'), + video_id, transform_source=js_to_json)['video'] + title = video_info['title'] formats = [] - for fdata in video_info.get('sources', [{}])[0]: + for fdata in video_info['sources']: src = fdata.get('src') if not src: continue ext = mimetype2ext(fdata.get('type')) or determine_ext(src) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue quality = fdata.get('quality') formats.append({ 'format_id': ext + ('-%s' % quality if quality else ''), @@ -169,7 +187,6 @@ class CondeNastIE(InfoExtractor): path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/'))) url_type = 'embed' - self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site]) webpage = self._download_webpage(url, item_id) if url_type == 'series': From ca5ed022e962ecd6992c145ac7bc00b5963e5d69 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 22 Mar 2017 17:28:24 +0100 Subject: [PATCH 0518/1696] [hbo] add support for free episode urls and new formats extraction(closes #12519) --- youtube_dl/extractor/hbo.py | 45 ++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index 8116ad9bd..931f71a5a 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( xpath_text, xpath_element, @@ -14,14 +15,26 @@ from ..utils import ( class HBOBaseIE(InfoExtractor): _FORMATS_INFO = { + 'pro7': { + 'width': 1280, + 'height': 720, + }, '1920': { 'width': 1280, 'height': 720, }, + 'pro6': { + 'width': 768, + 'height': 432, + }, '640': { 'width': 768, 'height': 432, }, + 'pro5': { + 'width': 640, + 'height': 360, + }, 'highwifi': { 'width': 640, 'height': 360, @@ -78,6 +91,17 @@ class HBOBaseIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( video_url.replace('.tar', '/base_index_w8.m3u8'), video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + elif source.tag == 'hls': + # #EXT-X-BYTERANGE is not supported by native hls downloader + # and ffmpeg (#10955) + # formats.extend(self._extract_m3u8_formats( + # video_url.replace('.tar', '/base_index.m3u8'), + # video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + continue + elif source.tag == 'dash': + formats.extend(self._extract_mpd_formats( + video_url.replace('.tar', '/manifest.mpd'), + video_id, mpd_id='dash', fatal=False)) else: format_info = self._FORMATS_INFO.get(source.tag, {}) formats.append({ @@ -112,10 +136,11 @@ class HBOBaseIE(InfoExtractor): class HBOIE(HBOBaseIE): + IE_NAME = 'hbo' _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', - 'md5': '1c33253f0c7782142c993c0ba62a8753', + 'md5': '2c6a6bc1222c7e91cb3334dad1746e5a', 'info_dict': { 'id': '1437839', 'ext': 'mp4', @@ -131,11 +156,12 @@ class HBOIE(HBOBaseIE): class HBOEpisodeIE(HBOBaseIE): - _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html' + IE_NAME = 'hbo:episode' + _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P<path>(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P<id>[0-9a-z-]+))(?:\.html)?' _TESTS = [{ 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true', - 'md5': '689132b253cc0ab7434237fc3a293210', + 'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb', 'info_dict': { 'id': '1439518', 'display_id': 'ep-52-inside-the-episode', @@ -147,16 +173,19 @@ class HBOEpisodeIE(HBOBaseIE): }, { 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true', 'only_matching': True, + }, { + 'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver', + 'only_matching': True, }] def _real_extract(self, url): - display_id = self._match_id(url) + path, display_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, display_id) + content = self._download_json( + 'http://www.hbo.com/api/content/' + path, display_id)['content'] - video_id = self._search_regex( - r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)', - webpage, 'video ID', group='video_id') + video_id = compat_str((content.get('parsed', {}).get( + 'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId']) info_dict = self._extract_from_id(video_id) info_dict['display_id'] = display_id From 579c99a284481243f30e80151c90a753f613778d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Mar 2017 23:48:06 +0700 Subject: [PATCH 0519/1696] [cloudy] Fix extraction (closes #12525) --- youtube_dl/extractor/cloudy.py | 107 ++++++++++----------------------- 1 file changed, 33 insertions(+), 74 deletions(-) diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index ae5ba0015..9bc8dbea4 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -1,97 +1,56 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_HTTPError, -) from ..utils import ( - ExtractorError, - HEADRequest, - remove_end, + str_to_int, + unified_strdate, ) class CloudyIE(InfoExtractor): _IE_DESC = 'cloudy.ec' - _VALID_URL = r'''(?x) - https?://(?:www\.)?cloudy\.ec/ - (?:v/|embed\.php\?id=) - (?P<id>[A-Za-z0-9]+) - ''' - _EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s' - _API_URL = 'http://www.cloudy.ec/api/player.api.php' - _MAX_TRIES = 2 - _TEST = { + _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' + _TESTS = [{ 'url': 'https://www.cloudy.ec/v/af511e2527aac', - 'md5': '5cb253ace826a42f35b4740539bedf07', + 'md5': '29832b05028ead1b58be86bf319397ca', 'info_dict': { 'id': 'af511e2527aac', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Funny Cats and Animals Compilation june 2013', + 'upload_date': '20130913', + 'view_count': int, } - } - - def _extract_video(self, video_id, file_key, error_url=None, try_num=0): - - if try_num > self._MAX_TRIES - 1: - raise ExtractorError('Unable to extract video URL', expected=True) - - form = { - 'file': video_id, - 'key': file_key, - } - - if error_url: - form.update({ - 'numOfErrors': try_num, - 'errorCode': '404', - 'errorUrl': error_url, - }) + }, { + 'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac', + 'only_matching': True, + }] - player_data = self._download_webpage( - self._API_URL, video_id, 'Downloading player data', query=form) - data = compat_parse_qs(player_data) - - try_num += 1 - - if 'error' in data: - raise ExtractorError( - '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])), - expected=True) + def _real_extract(self, url): + video_id = self._match_id(url) - title = data.get('title', [None])[0] - if title: - title = remove_end(title, '&asdasdas').strip() + webpage = self._download_webpage( + 'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id) - video_url = data.get('url', [None])[0] + info = self._parse_html5_media_entries(url, webpage, video_id)[0] - if video_url: - try: - self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL') - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]: - self.report_warning('Invalid video URL, requesting another', video_id) - return self._extract_video(video_id, file_key, video_url, try_num) + webpage = self._download_webpage( + 'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False) - return { - 'id': video_id, - 'url': video_url, - 'title': title, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + if webpage: + info.update({ + 'title': self._search_regex( + r'<h\d[^>]*>([^<]+)<', webpage, 'title'), + 'upload_date': unified_strdate(self._search_regex( + r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage, + 'upload date', fatal=False)), + 'view_count': str_to_int(self._search_regex( + r'([\d,.]+) views<', webpage, 'view count', fatal=False)), + }) - url = self._EMBED_URL % video_id - webpage = self._download_webpage(url, video_id) + if not info.get('title'): + info['title'] = video_id - file_key = self._search_regex( - [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'], - webpage, 'file_key') + info['id'] = video_id - return self._extract_video(video_id, file_key) + return info From b0f7f21cb92ca3af1795f68737ffa25196968dc6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Mar 2017 09:22:17 +0100 Subject: [PATCH 0520/1696] [channel9] fix extraction(closes #11323) --- youtube_dl/extractor/channel9.py | 345 ++++++++++++------------------- 1 file changed, 127 insertions(+), 218 deletions(-) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index 865dbcaba..b1cb58530 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -5,8 +5,10 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, - parse_filesize, - qualities, + unescapeHTML, + int_or_none, + parse_iso8601, + clean_html, ) @@ -20,46 +22,50 @@ class Channel9IE(InfoExtractor): ''' IE_DESC = 'Channel 9' IE_NAME = 'channel9' - _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)' + _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)' _TESTS = [{ 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', - 'md5': 'bbd75296ba47916b754e73c3a4bbdf10', + 'md5': '32083d4eaf1946db6d454313f44510ca', 'info_dict': { - 'id': 'Events/TechEd/Australia/2013/KOS002', - 'ext': 'mp4', + 'id': '6c413323-383a-49dc-88f9-a22800cab024', + 'ext': 'wmv', 'title': 'Developer Kick-Off Session: Stuff We Love', - 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', + 'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731', 'duration': 4576, - 'thumbnail': r're:http://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', + 'timestamp': 1377717420, + 'upload_date': '20130828', 'session_code': 'KOS002', - 'session_day': 'Day 1', 'session_room': 'Arena 1A', - 'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', - 'Mads Kristensen'], + 'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'], }, }, { 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', - 'md5': 'b43ee4529d111bc37ba7ee4f34813e68', + 'md5': 'dcf983ee6acd2088e7188c3cf79b46bc', 'info_dict': { - 'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing', - 'ext': 'mp4', + 'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024', + 'ext': 'wmv', 'title': 'Self-service BI with Power BI - nuclear testing', - 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', + 'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54', 'duration': 1540, - 'thumbnail': r're:http://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', + 'timestamp': 1386381991, + 'upload_date': '20131207', 'authors': ['Mike Wilmot'], }, }, { # low quality mp4 is best 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', 'info_dict': { - 'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', + 'id': '33ad69d2-6a4e-4172-83a1-a523013dec76', 'ext': 'mp4', 'title': 'Ranges for the Standard Library', - 'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d', + 'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372', 'duration': 5646, - 'thumbnail': r're:http://.*\.jpg', + 'thumbnail': r're:https?://.*\.jpg', + 'upload_date': '20150930', + 'timestamp': 1443640735, }, 'params': { 'skip_download': True, @@ -70,7 +76,7 @@ class Channel9IE(InfoExtractor): 'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b', 'title': 'Channel 9', }, - 'playlist_count': 2, + 'playlist_mincount': 100, }, { 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', 'only_matching': True, @@ -81,189 +87,6 @@ class Channel9IE(InfoExtractor): _RSS_URL = 'http://channel9.msdn.com/%s/RSS' - def _formats_from_html(self, html): - FORMAT_REGEX = r''' - (?x) - <a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s* - <span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s* - (?:<div\s+class="popup\s+rounded">\s* - <h3>File\s+size</h3>\s*(?P<filesize>.*?)\s* - </div>)? # File size part may be missing - ''' - quality = qualities(( - 'MP3', 'MP4', - 'Low Quality WMV', 'Low Quality MP4', - 'Mid Quality WMV', 'Mid Quality MP4', - 'High Quality WMV', 'High Quality MP4')) - formats = [{ - 'url': x.group('url'), - 'format_id': x.group('quality'), - 'format_note': x.group('note'), - 'format': '%s (%s)' % (x.group('quality'), x.group('note')), - 'filesize_approx': parse_filesize(x.group('filesize')), - 'quality': quality(x.group('quality')), - 'vcodec': 'none' if x.group('note') == 'Audio only' else None, - } for x in list(re.finditer(FORMAT_REGEX, html))] - - self._sort_formats(formats) - - return formats - - def _extract_title(self, html): - title = self._html_search_meta('title', html, 'title') - if title is None: - title = self._og_search_title(html) - TITLE_SUFFIX = ' (Channel 9)' - if title is not None and title.endswith(TITLE_SUFFIX): - title = title[:-len(TITLE_SUFFIX)] - return title - - def _extract_description(self, html): - DESCRIPTION_REGEX = r'''(?sx) - <div\s+class="entry-content">\s* - <div\s+id="entry-body">\s* - (?P<description>.+?)\s* - </div>\s* - </div> - ''' - m = re.search(DESCRIPTION_REGEX, html) - if m is not None: - return m.group('description') - return self._html_search_meta('description', html, 'description') - - def _extract_duration(self, html): - m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html) - return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None - - def _extract_slides(self, html): - m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html) - return m.group('slidesurl') if m is not None else None - - def _extract_zip(self, html): - m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html) - return m.group('zipurl') if m is not None else None - - def _extract_avg_rating(self, html): - m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html) - return float(m.group('avgrating')) if m is not None else 0 - - def _extract_rating_count(self, html): - m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html) - return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0 - - def _extract_view_count(self, html): - m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html) - return int(self._fix_count(m.group('viewcount'))) if m is not None else 0 - - def _extract_comment_count(self, html): - m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html) - return int(self._fix_count(m.group('commentcount'))) if m is not None else 0 - - def _fix_count(self, count): - return int(str(count).replace(',', '')) if count is not None else None - - def _extract_authors(self, html): - m = re.search(r'(?s)<li class="author">(.*?)</li>', html) - if m is None: - return None - return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1)) - - def _extract_session_code(self, html): - m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html) - return m.group('code') if m is not None else None - - def _extract_session_day(self, html): - m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html) - return m.group('day').strip() if m is not None else None - - def _extract_session_room(self, html): - m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html) - return m.group('room') if m is not None else None - - def _extract_session_speakers(self, html): - return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html) - - def _extract_content(self, html, content_path): - # Look for downloadable content - formats = self._formats_from_html(html) - slides = self._extract_slides(html) - zip_ = self._extract_zip(html) - - # Nothing to download - if len(formats) == 0 and slides is None and zip_ is None: - self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path) - return - - # Extract meta - title = self._extract_title(html) - description = self._extract_description(html) - thumbnail = self._og_search_thumbnail(html) - duration = self._extract_duration(html) - avg_rating = self._extract_avg_rating(html) - rating_count = self._extract_rating_count(html) - view_count = self._extract_view_count(html) - comment_count = self._extract_comment_count(html) - - common = { - '_type': 'video', - 'id': content_path, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'avg_rating': avg_rating, - 'rating_count': rating_count, - 'view_count': view_count, - 'comment_count': comment_count, - } - - result = [] - - if slides is not None: - d = common.copy() - d.update({'title': title + '-Slides', 'url': slides}) - result.append(d) - - if zip_ is not None: - d = common.copy() - d.update({'title': title + '-Zip', 'url': zip_}) - result.append(d) - - if len(formats) > 0: - d = common.copy() - d.update({'title': title, 'formats': formats}) - result.append(d) - - return result - - def _extract_entry_item(self, html, content_path): - contents = self._extract_content(html, content_path) - if contents is None: - return contents - - if len(contents) > 1: - raise ExtractorError('Got more than one entry') - result = contents[0] - result['authors'] = self._extract_authors(html) - - return result - - def _extract_session(self, html, content_path): - contents = self._extract_content(html, content_path) - if contents is None: - return contents - - session_meta = { - 'session_code': self._extract_session_code(html), - 'session_day': self._extract_session_day(html), - 'session_room': self._extract_session_room(html), - 'session_speakers': self._extract_session_speakers(html), - } - - for content in contents: - content.update(session_meta) - - return self.playlist_result(contents) - def _extract_list(self, video_id, rss_url=None): if not rss_url: rss_url = self._RSS_URL % video_id @@ -274,9 +97,7 @@ class Channel9IE(InfoExtractor): return self.playlist_result(entries, video_id, title_text) def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - content_path = mobj.group('contentpath') - rss = mobj.group('rss') + content_path, rss = re.match(self._VALID_URL, url).groups() if rss: return self._extract_list(content_path, url) @@ -284,17 +105,105 @@ class Channel9IE(InfoExtractor): webpage = self._download_webpage( url, content_path, 'Downloading web page') - page_type = self._search_regex( - r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2', - webpage, 'page type', default=None, group='pagetype') - if page_type: - if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content - return self._extract_entry_item(webpage, content_path) - elif page_type == 'Session': # Event session page, may contain downloadable content - return self._extract_session(webpage, content_path) - elif page_type == 'Event': - return self._extract_list(content_path) + episode_data = self._search_regex( + r"data-episode='([^']+)'", webpage, 'episode data', default=None) + if episode_data: + episode_data = self._parse_json(unescapeHTML( + episode_data), content_path) + content_id = episode_data['contentId'] + is_session = '/Sessions(' in episode_data['api'] + content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + if is_session: + content_url += '?$expand=Speakers' + else: + content_url += '?$expand=Authors' + content_data = self._download_json(content_url, content_id) + title = content_data['Title'] + + formats = [] + qualities = [ + 'VideoMP4Low', + 'VideoWMV', + 'VideoMP4Medium', + 'VideoMP4High', + 'VideoWMVHQ', + ] + for q in qualities: + q_url = content_data.get(q) + if not q_url: + continue + formats.append({ + 'format_id': q, + 'url': q_url, + }) + slides = content_data.get('Slides') + zip_file = content_data.get('ZipFile') + + if not formats and not slides and not zip_file: + raise ExtractorError( + 'None of recording, slides or zip are available for %s' % content_path) + + subtitles = {} + for caption in content_data.get('Captions', []): + caption_url = caption.get('Url') + if not caption_url: + continue + subtitles.setdefault(caption.get('Language', 'en'), []).append({ + 'url': caption_url, + 'ext': 'vtt', + }) + + common = { + 'id': content_id, + 'title': title, + 'description': clean_html(content_data.get('Description') or content_data.get('Body')), + 'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'), + 'duration': int_or_none(content_data.get('MediaLengthInSeconds')), + 'timestamp': parse_iso8601(content_data.get('PublishedDate')), + 'avg_rating': int_or_none(content_data.get('Rating')), + 'rating_count': int_or_none(content_data.get('RatingCount')), + 'view_count': int_or_none(content_data.get('Views')), + 'comment_count': int_or_none(content_data.get('CommentCount')), + 'subtitles': subtitles, + } + if is_session: + speakers = [] + for s in content_data.get('Speakers', []): + speaker_name = s.get('FullName') + if not speaker_name: + continue + speakers.append(speaker_name) + + common.update({ + 'session_code': content_data.get('Code'), + 'session_room': content_data.get('Room'), + 'session_speakers': speakers, + }) else: - raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True) - else: # Assuming list + authors = [] + for a in content_data.get('Authors', []): + author_name = a.get('DisplayName') + if not author_name: + continue + authors.append(author_name) + common['authors'] = authors + + contents = [] + + if slides: + d = common.copy() + d.update({'title': title + '-Slides', 'url': slides}) + contents.append(d) + + if zip_file: + d = common.copy() + d.update({'title': title + '-Zip', 'url': zip_file}) + contents.append(d) + + if formats: + d = common.copy() + d.update({'title': title, 'formats': formats}) + contents.append(d) + return self.playlist_result(contents) + else: return self._extract_list(content_path) From 52d5ecabd518db46fc02b8624b2ad04ba7cf2114 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Mar 2017 13:48:32 +0100 Subject: [PATCH 0521/1696] [bellmedia] add support for etalk.ca(closes #12447) --- youtube_dl/extractor/bellmedia.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bellmedia.py b/youtube_dl/extractor/bellmedia.py index 1f5b6ed92..8820a3914 100644 --- a/youtube_dl/extractor/bellmedia.py +++ b/youtube_dl/extractor/bellmedia.py @@ -21,10 +21,11 @@ class BellMediaIE(InfoExtractor): animalplanet| bravo| mtv| - space + space| + etalk )\.ca| much\.com - )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})''' + )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})''' _TESTS = [{ 'url': 'http://www.ctv.ca/video/player?vid=706966', 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', @@ -58,6 +59,9 @@ class BellMediaIE(InfoExtractor): }, { 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430', 'only_matching': True, + }, { + 'url': 'http://www.etalk.ca/video?videoid=663455', + 'only_matching': True, }] _DOMAINS = { 'thecomedynetwork': 'comedy', @@ -65,6 +69,7 @@ class BellMediaIE(InfoExtractor): 'sciencechannel': 'discsci', 'investigationdiscovery': 'invdisc', 'animalplanet': 'aniplan', + 'etalk': 'ctv', } def _real_extract(self, url): From d0572557c2a88e34d85715af4271e8b5decbdfdb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Mar 2017 13:52:07 +0100 Subject: [PATCH 0522/1696] [ninecninemedia] remove mp4 url extraction request --- youtube_dl/extractor/ninecninemedia.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/youtube_dl/extractor/ninecninemedia.py b/youtube_dl/extractor/ninecninemedia.py index d9943fc2c..8961309fd 100644 --- a/youtube_dl/extractor/ninecninemedia.py +++ b/youtube_dl/extractor/ninecninemedia.py @@ -34,12 +34,6 @@ class NineCNineMediaStackIE(NineCNineMediaBaseIE): formats.extend(self._extract_f4m_formats( stack_base_url + 'f4m', stack_id, f4m_id='hds', fatal=False)) - mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False) - if mp4_url: - formats.append({ - 'url': mp4_url, - 'format_id': 'mp4', - }) self._sort_formats(formats) return { From a5d783f525a8d4b62777434607c7f1efc5c34ece Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Mar 2017 23:47:43 +0700 Subject: [PATCH 0523/1696] [channel9] Extract more formats --- youtube_dl/extractor/channel9.py | 76 +++++++++++++++++++++++++++----- 1 file changed, 65 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index b1cb58530..717e4eb3b 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -9,6 +9,7 @@ from ..utils import ( int_or_none, parse_iso8601, clean_html, + qualities, ) @@ -120,22 +121,75 @@ class Channel9IE(InfoExtractor): content_data = self._download_json(content_url, content_id) title = content_data['Title'] + QUALITIES = ( + 'mp3', + 'wmv', 'mp4', + 'wmv-low', 'mp4-low', + 'wmv-mid', 'mp4-mid', + 'wmv-high', 'mp4-high', + ) + + quality_key = qualities(QUALITIES) + + def quality(quality_id, format_url): + return (len(QUALITIES) if '_Source.' in format_url + else quality_key(quality_id)) + formats = [] - qualities = [ - 'VideoMP4Low', - 'VideoWMV', - 'VideoMP4Medium', - 'VideoMP4High', - 'VideoWMVHQ', - ] - for q in qualities: - q_url = content_data.get(q) - if not q_url: + urls = set() + + SITE_QUALITIES = { + 'MP3': 'mp3', + 'MP4': 'mp4', + 'Low Quality WMV': 'wmv-low', + 'Low Quality MP4': 'mp4-low', + 'Mid Quality WMV': 'wmv-mid', + 'Mid Quality MP4': 'mp4-mid', + 'High Quality WMV': 'wmv-high', + 'High Quality MP4': 'mp4-high', + } + + formats_select = self._search_regex( + r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage, + 'formats select', default=None) + if formats_select: + for mobj in re.finditer( + r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<', + formats_select): + format_url = mobj.group('url') + if format_url in urls: + continue + urls.add(format_url) + format_id = mobj.group('format') + quality_id = SITE_QUALITIES.get(format_id, format_id) + formats.append({ + 'url': format_url, + 'format_id': quality_id, + 'quality': quality(quality_id, format_url), + 'vcodec': 'none' if quality_id == 'mp3' else None, + }) + + API_QUALITIES = { + 'VideoMP4Low': 'mp4-low', + 'VideoWMV': 'wmv-mid', + 'VideoMP4Medium': 'mp4-mid', + 'VideoMP4High': 'mp4-high', + 'VideoWMVHQ': 'wmv-hq', + } + + for format_id, q in API_QUALITIES.items(): + q_url = content_data.get(format_id) + if not q_url or q_url in urls: continue + urls.add(q_url) formats.append({ - 'format_id': q, 'url': q_url, + 'format_id': q, + 'quality': quality(q, q_url), }) + + self._sort_formats(formats) + slides = content_data.get('Slides') zip_file = content_data.get('ZipFile') From bea7af694748f3d731ab4340539251f2daf5cc10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Mar 2017 23:58:12 +0700 Subject: [PATCH 0524/1696] [channel9] Remove expired comment and sort imports --- youtube_dl/extractor/channel9.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index 717e4eb3b..e92894246 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -4,23 +4,16 @@ import re from .common import InfoExtractor from ..utils import ( + clean_html, ExtractorError, - unescapeHTML, int_or_none, parse_iso8601, - clean_html, qualities, + unescapeHTML, ) class Channel9IE(InfoExtractor): - ''' - Common extractor for channel9.msdn.com. - - The type of provided URL (video or playlist) is determined according to - meta Search.PageType from web page HTML rather than URL itself, as it is - not always possible to do. - ''' IE_DESC = 'Channel 9' IE_NAME = 'channel9' _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)' From 7963b6cba8d020d5553bb98aee1d098870f78f42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Mar 2017 00:19:58 +0700 Subject: [PATCH 0525/1696] [ChangeLog] Actualize --- ChangeLog | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ChangeLog b/ChangeLog index dc5acbca9..2df64ea73 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +version <unreleased> + +Extractors +- [9c9media] Remove mp4 URL extraction request ++ [bellmedia] Add support for etalk.ca and space.ca (#12447) +* [channel9] Fix extraction (#11323) +* [cloudy] Fix extraction (#12525) ++ [hbo] Add support for free episode URLs and new formats extraction (#12519) +* [condenast] Fix extraction and style (#12526) +* [viu] Relax URL regular expression (#12529) + + version 2017.03.22 Extractors From a3ccd6bd11454b9760ef2c5f09f02f3afdb11af5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Mar 2017 00:24:23 +0700 Subject: [PATCH 0526/1696] release 2017.03.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 4 ++-- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 31ba1de3d..dfff41d2d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.22** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.22 +[debug] youtube-dl version 2017.03.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 2df64ea73..78377dcb4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.03.24 Extractors - [9c9media] Remove mp4 URL extraction request diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 6a7be28cb..7c99ba3c2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -312,8 +312,8 @@ - **GPUTechConf** - **Groupon** - **Hark** - - **HBO** - - **HBOEpisode** + - **hbo** + - **hbo:episode** - **HearThisAt** - **Heise** - **HellPorno** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4d722873d..13904c724 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.22' +__version__ = '2017.03.24' From 54b960f340ed5398136ef0206d17cafba2575678 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Mar 2017 00:45:24 +0700 Subject: [PATCH 0527/1696] [generic] Do not follow redirects to the same URL --- youtube_dl/extractor/generic.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index cb6308d29..da9d04efc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2635,11 +2635,14 @@ class GenericIE(InfoExtractor): found = re.search(REDIRECT_REGEX, refresh_header) if found: new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1))) - self.report_following_redirect(new_url) - return { - '_type': 'url', - 'url': new_url, - } + if new_url != url: + self.report_following_redirect(new_url) + return { + '_type': 'url', + 'url': new_url, + } + else: + found = None if not found: # twitter:player is a https URL to iframe player that may or may not From d0ba55871e6754fdc8a6a28543581989ba3c50fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Mar 2017 01:17:17 +0700 Subject: [PATCH 0528/1696] [youtube] Improve _VALID_URLs (closes #12538) --- youtube_dl/extractor/youtube.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index caa048249..ca40de522 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -59,6 +59,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False + _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}' + def _set_language(self): self._set_cookie( '.youtube.com', 'PREF', 'f1=50000000&hl=en', @@ -265,9 +267,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ) )? # all until now is optional -> you can pass the naked ID ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID - (?!.*?\blist=) # combined list/video URLs are handled by the playlist IE + (?!.*?\blist= + (?: + %(playlist_id)s| # combined list/video URLs are handled by the playlist IE + WL # WL are handled by the watch later IE + ) + ) (?(1).+)? # if we found the ID, everything can follow - $""" + $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' _formats = { '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, @@ -924,6 +931,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'sJL6WA-aGkQ', 'only_matching': True, }, + { + 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', + 'only_matching': True, + }, ] def __init__(self, *args, **kwargs): @@ -1864,8 +1875,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): ) .* | - ((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}) - )""" + (%(playlist_id)s) + )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true' _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?' IE_NAME = 'youtube:playlist' From 31a1214076ff41efbaa7f7243565da830d1e2c7e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 25 Mar 2017 07:03:13 +0100 Subject: [PATCH 0529/1696] [franceculture] fix extraction(closes #12547) --- youtube_dl/extractor/franceculture.py | 31 ++++++++++++++++----------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py index b98da692c..df3d757f3 100644 --- a/youtube_dl/extractor/franceculture.py +++ b/youtube_dl/extractor/franceculture.py @@ -4,6 +4,8 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( determine_ext, + extract_attributes, + int_or_none, unified_strdate, ) @@ -19,6 +21,7 @@ class FranceCultureIE(InfoExtractor): 'title': 'Rendez-vous au pays des geeks', 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20140301', + 'timestamp': 1393642916, 'vcodec': 'none', } } @@ -28,30 +31,34 @@ class FranceCultureIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - video_url = self._search_regex( - r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"', - webpage, 'video path') + video_data = extract_attributes(self._search_regex( + r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)', + webpage, 'video data')) - title = self._og_search_title(webpage) + video_url = video_data['data-asset-source'] + title = video_data.get('data-asset-title') or self._og_search_title(webpage) - upload_date = unified_strdate(self._search_regex( - '(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<', - webpage, 'upload date', fatal=False)) + description = self._html_search_regex( + r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>', + webpage, 'description', default=None) thumbnail = self._search_regex( - r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"', + r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"', webpage, 'thumbnail', fatal=False) uploader = self._html_search_regex( - r'(?s)<div id="emission".*?<span class="author">(.*?)</span>', + r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None) - vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None + ext = determine_ext(video_url.lower()) return { 'id': display_id, 'display_id': display_id, 'url': video_url, 'title': title, + 'description': description, 'thumbnail': thumbnail, - 'vcodec': vcodec, + 'ext': ext, + 'vcodec': 'none' if ext == 'mp3' else None, 'uploader': uploader, - 'upload_date': upload_date, + 'timestamp': int_or_none(video_data.get('data-asset-created-date')), + 'duration': int_or_none(video_data.get('data-duration')), } From 1088d76da6cbc83d64faca5a1a987944af04b0ce Mon Sep 17 00:00:00 2001 From: zurfyx <zurfyx@gmail.com> Date: Sat, 25 Mar 2017 00:45:32 +0100 Subject: [PATCH 0530/1696] [atresplayer] Fix login error detection --- youtube_dl/extractor/atresplayer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index e3c669830..23a536ff2 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -90,7 +90,8 @@ class AtresPlayerIE(InfoExtractor): request, None, 'Logging in as %s' % username) error = self._html_search_regex( - r'(?s)<ul class="list_error">(.+?)</ul>', response, 'error', default=None) + r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>', + response, 'error', default=None) if error: raise ExtractorError( 'Unable to login: %s' % error, expected=True) From 048086920bdb92cc5d63847e9e1d2fd645910363 Mon Sep 17 00:00:00 2001 From: zurfyx <zurfyx@gmail.com> Date: Sat, 25 Mar 2017 01:08:47 +0100 Subject: [PATCH 0531/1696] [atresplayer] Extract HD manifest --- youtube_dl/extractor/atresplayer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 23a536ff2..940c548f4 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -161,7 +161,8 @@ class AtresPlayerIE(InfoExtractor): # this videos are protected by DRM, the f4m downloader doesn't support them continue else: - f4m_url = video_url[:-9] + '/manifest.f4m' + video_url_hd = video_url.replace('free_es', 'es') + f4m_url = video_url_hd[:-9] + '/manifest.f4m' formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) From c7301e677bddb5d676ebf207a3ac485fce330057 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Mar 2017 18:03:46 +0700 Subject: [PATCH 0532/1696] [atresplayer] Extract DASH and ISM formats --- youtube_dl/extractor/atresplayer.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 940c548f4..ffac9df0e 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -160,10 +160,15 @@ class AtresPlayerIE(InfoExtractor): f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) # this videos are protected by DRM, the f4m downloader doesn't support them continue - else: - video_url_hd = video_url.replace('free_es', 'es') - f4m_url = video_url_hd[:-9] + '/manifest.f4m' - formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) + video_url_hd = video_url.replace('free_es', 'es') + formats.extend(self._extract_f4m_formats( + video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds', + fatal=False)) + formats.extend(self._extract_mpd_formats( + video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash', + fatal=False)) + formats.extend(self._extract_ism_formats( + video_url_hd, video_id, ism_id='mss', fatal=False)) self._sort_formats(formats) path_data = player.get('pathData') From e8e4cc5a6a3ad8bf94d9ff9e5bb2d72712e14c34 Mon Sep 17 00:00:00 2001 From: John Hawkinson <jhawk@mit.edu> Date: Sun, 19 Mar 2017 20:52:25 -0400 Subject: [PATCH 0533/1696] [generic] Replace LazyYT test with skiplagged discourse.ubuntu.com has gone away, repalce with skiplagged.com. Be nice to have a non-frontpage URL that might be more stable, though I don't have one. Maybe this should move to html in test/test_InfoExtractor.py? --- youtube_dl/extractor/generic.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index da9d04efc..4fff93efe 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -902,12 +902,13 @@ class GenericIE(InfoExtractor): }, # LazyYT { - 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', + 'url': 'https://skiplagged.com/', 'info_dict': { - 'id': '1986', - 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', + 'id': 'skiplagged', + 'title': 'Skiplagged: The smart way to find cheap flights', }, - 'playlist_mincount': 2, + 'playlist_mincount': 1, + 'add_ie': ['Youtube'], }, # Cinchcast embed { From 7aa0ee321b4095da7a2430f383bea773115e1491 Mon Sep 17 00:00:00 2001 From: gkoelln <gkoelln7@gmail.com> Date: Sat, 25 Mar 2017 08:12:25 -0500 Subject: [PATCH 0534/1696] [fox] Add metadata extraction Add series, season number, episode number and episode. --- youtube_dl/extractor/fox.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 9f2e5d065..cc5d62ebc 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .adobepass import AdobePassIE from ..utils import ( + int_or_none, smuggle_url, update_url_query, ) @@ -47,9 +48,12 @@ class FOXIE(AdobePassIE): resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating) query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource) - return { + info = self._search_json_ld(webpage, video_id, fatal=False) + info.update({ '_type': 'url_transparent', 'ie_key': 'ThePlatform', 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), 'id': video_id, - } + }) + + return info From d97729c83a747f48d83f4aba9b85d2a14a58b8b7 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 25 Mar 2017 14:28:53 +0100 Subject: [PATCH 0535/1696] [fox] remove unused import --- youtube_dl/extractor/fox.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index cc5d62ebc..159fdf9c4 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals from .adobepass import AdobePassIE from ..utils import ( - int_or_none, smuggle_url, update_url_query, ) From c6c22e984d0d35172d8e39d2136d2059494d22b2 Mon Sep 17 00:00:00 2001 From: John Hawkinson <jhawk@mit.edu> Date: Sat, 25 Mar 2017 10:36:40 -0400 Subject: [PATCH 0536/1696] [test_download] Print additional IEs in summary output --- test/test_download.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/test_download.py b/test/test_download.py index 30034f978..01a8bcb89 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -71,6 +71,18 @@ class TestDownload(unittest.TestCase): maxDiff = None + def __str__(self): + """Identify each test with the `add_ie` attribute, if available.""" + + def strclass(cls): + """From 2.7's unittest; 2.6 had _strclass so we can't import it.""" + return '%s.%s' % (cls.__module__, cls.__name__) + + add_ie = getattr(self, self._testMethodName).add_ie + return '%s (%s)%s:' % (self._testMethodName, + strclass(self.__class__), + ' [%s]' % add_ie if add_ie else '') + def setUp(self): self.defs = defs @@ -233,6 +245,8 @@ for n, test_case in enumerate(defs): i += 1 test_method = generator(test_case, tname) test_method.__name__ = str(tname) + ie_list = test_case.get('add_ie') + test_method.add_ie = ie_list and ','.join(ie_list) setattr(TestDownload, test_method.__name__, test_method) del test_method From 610a6d10538d8ecab8e51dc083f02adbd09f706f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Mar 2017 21:40:28 +0700 Subject: [PATCH 0537/1696] [atresplayer] Do not extract ISM formats As per @remitamine: the ISM downloader does not support videos served from wowza servers(it will produce broken files) --- youtube_dl/extractor/atresplayer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index ffac9df0e..bfda1e24e 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -167,8 +167,6 @@ class AtresPlayerIE(InfoExtractor): formats.extend(self._extract_mpd_formats( video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash', fatal=False)) - formats.extend(self._extract_ism_formats( - video_url_hd, video_id, ism_id='mss', fatal=False)) self._sort_formats(formats) path_data = player.get('pathData') From d66d43c5547daf4fc1a269824a8432477fbb099d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 25 Mar 2017 18:13:13 +0100 Subject: [PATCH 0538/1696] [atvat] Add new extractor(closes #5325) --- youtube_dl/extractor/atvat.py | 73 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 74 insertions(+) create mode 100644 youtube_dl/extractor/atvat.py diff --git a/youtube_dl/extractor/atvat.py b/youtube_dl/extractor/atvat.py new file mode 100644 index 000000000..1584d53fc --- /dev/null +++ b/youtube_dl/extractor/atvat.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + unescapeHTML, +) + + +class ATVAtIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)' + _TESTS = [{ + 'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/', + 'md5': 'c3b6b975fb3150fc628572939df205f2', + 'info_dict': { + 'id': '1698447', + 'ext': 'mp4', + 'title': 'DI, 21.03.17 | 20:05 Uhr 1/1', + } + }, { + 'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_data = self._parse_json(unescapeHTML(self._search_regex( + r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"', + webpage, 'player data')), display_id)['config']['initial_video'] + + video_id = video_data['id'] + video_title = video_data['title'] + + parts = [] + for part in video_data.get('parts', []): + part_id = part['id'] + part_title = part['title'] + + formats = [] + for source in part.get('sources', []): + source_url = source.get('src') + if not source_url: + continue + ext = determine_ext(source_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, part_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': source.get('delivery'), + 'url': source_url, + }) + self._sort_formats(formats) + + parts.append({ + 'id': part_id, + 'title': part_title, + 'thumbnail': part.get('preview_image_url'), + 'duration': int_or_none(part.get('duration')), + 'is_live': part.get('is_livestream'), + 'formats': formats, + }) + + return { + '_type': 'multi_video', + 'id': video_id, + 'title': video_title, + 'entries': parts, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 40a5c9842..6a7028a4d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -71,6 +71,7 @@ from .arte import ( ) from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE +from .atvat import ATVAtIE from .audimedia import AudiMediaIE from .audioboom import AudioBoomIE from .audiomack import AudiomackIE, AudiomackAlbumIE From 51ef4919dfd51b5bd562f39f865a117f9a5cd304 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 6 Mar 2017 00:31:44 +0800 Subject: [PATCH 0539/1696] [afreecatv] Fix extraction (closes #12179) --- ChangeLog | 6 +++ youtube_dl/extractor/afreecatv.py | 72 +++++++++++++------------------ 2 files changed, 36 insertions(+), 42 deletions(-) diff --git a/ChangeLog b/ChangeLog index 78377dcb4..45d6f244d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [afreecatv] Fix extraction (#12179) + + version 2017.03.24 Extractors diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index e0a0f7c57..b774d6db8 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -4,15 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlparse, - compat_urlparse, -) +from ..compat import compat_xpath from ..utils import ( ExtractorError, int_or_none, - update_url_query, - xpath_element, xpath_text, ) @@ -43,7 +38,8 @@ class AfreecaTVIE(InfoExtractor): 'uploader': 'dailyapril', 'uploader_id': 'dailyapril', 'upload_date': '20160503', - } + }, + 'skip': 'Video is gone', }, { 'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867', 'info_dict': { @@ -71,6 +67,19 @@ class AfreecaTVIE(InfoExtractor): 'upload_date': '20160502', }, }], + 'skip': 'Video is gone', + }, { + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793', + 'info_dict': { + 'id': '18650793', + 'ext': 'flv', + 'uploader': '윈아디', + 'uploader_id': 'badkids', + 'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!', + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, }, { 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', 'only_matching': True, @@ -90,40 +99,33 @@ class AfreecaTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - parsed_url = compat_urllib_parse_urlparse(url) - info_url = compat_urlparse.urlunparse(parsed_url._replace( - netloc='afbbs.afreecatv.com:8080', - path='/api/video/get_video_info.php')) video_xml = self._download_xml( - update_url_query(info_url, {'nTitleNo': video_id}), video_id) + 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', + video_id, query={'nTitleNo': video_id}) - if xpath_element(video_xml, './track/video/file') is None: + video_element = video_xml.findall(compat_xpath('./track/video'))[1] + if video_element is None or video_element.text is None: raise ExtractorError('Specified AfreecaTV video does not exist', expected=True) - title = xpath_text(video_xml, './track/title', 'title') + video_url_raw = video_element.text + + app, playpath = video_url_raw.split('mp4:') + + title = xpath_text(video_xml, './track/title', 'title', fatal=True) uploader = xpath_text(video_xml, './track/nickname', 'uploader') uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id') duration = int_or_none(xpath_text(video_xml, './track/duration', 'duration')) thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') - entries = [] - for i, video_file in enumerate(video_xml.findall('./track/video/file')): - video_key = self.parse_video_key(video_file.get('key', '')) - if not video_key: - continue - entries.append({ - 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), - 'title': title, - 'upload_date': video_key.get('upload_date'), - 'duration': int_or_none(video_file.get('duration')), - 'url': video_file.text, - }) - - info = { + return { 'id': video_id, + 'url': app, + 'ext': 'flv', + 'play_path': 'mp4:' + playpath, + 'rtmp_live': True, # downloading won't end without this 'title': title, 'uploader': uploader, 'uploader_id': uploader_id, @@ -131,20 +133,6 @@ class AfreecaTVIE(InfoExtractor): 'thumbnail': thumbnail, } - if len(entries) > 1: - info['_type'] = 'multi_video' - info['entries'] = entries - elif len(entries) == 1: - info['url'] = entries[0]['url'] - info['upload_date'] = entries[0].get('upload_date') - else: - raise ExtractorError( - 'No files found for the specified AfreecaTV video, either' - ' the URL is incorrect or the video has been made private.', - expected=True) - - return info - class AfreecaTVGlobalIE(AfreecaTVIE): IE_NAME = 'afreecatv:global' From 03486dbb0133e42074c272f60e24f18c856fdf0d Mon Sep 17 00:00:00 2001 From: Random User <rndusr@posteo.de> Date: Sat, 25 Mar 2017 19:37:45 +0100 Subject: [PATCH 0540/1696] Add test for JWPlayer where config is passed as variable --- youtube_dl/extractor/generic.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9868ca6d0..c8c103ae3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -972,6 +972,20 @@ class GenericIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', }, }, + { + # JWPlayer config passed as variable + 'url': 'http://www.txxx.com/videos/3326530/ariele/', + 'info_dict': { + 'id': '3326530_hq', + 'ext': 'mp4', + 'title': 'ARIELE | Tube Cup', + 'uploader': 'www.txxx.com', + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + } + }, # rtl.nl embed { 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', From fb4fc44928d042a33287fd3e8e18b721c29ff8e8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 25 Mar 2017 19:37:54 +0100 Subject: [PATCH 0541/1696] [downloader/hls] immediately delegate downloading to ffmpeg in case live stream --- youtube_dl/downloader/hls.py | 21 +++++++++++++-------- youtube_dl/extractor/arkena.py | 3 +-- youtube_dl/extractor/ceskatelevize.py | 3 +-- youtube_dl/extractor/eyedotv.py | 2 +- youtube_dl/extractor/freshlive.py | 5 ++--- youtube_dl/extractor/livestream.py | 15 ++++++++------- youtube_dl/extractor/vk.py | 3 +-- 7 files changed, 27 insertions(+), 25 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 4989abce1..7534e4da5 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -30,6 +30,15 @@ class HlsFD(FragmentFD): FD_NAME = 'hlsnative' + def _delegate_to_ffmpeg(self, filename, info_dict): + self.report_warning( + 'hlsnative has detected features it does not support, ' + 'extraction will be delegated to ffmpeg') + fd = FFmpegFD(self.ydl, self.params) + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + return fd.real_download(filename, info_dict) + @staticmethod def can_download(manifest, info_dict): UNSUPPORTED_FEATURES = ( @@ -53,10 +62,12 @@ class HlsFD(FragmentFD): ) check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest) - check_results.append(not info_dict.get('is_live')) return all(check_results) def real_download(self, filename, info_dict): + if info_dict.get('is_live'): + return self._delegate_to_ffmpeg(filename, info_dict) + man_url = info_dict['url'] self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) @@ -68,13 +79,7 @@ class HlsFD(FragmentFD): if info_dict.get('extra_param_to_segment_url'): self.report_error('pycrypto not found. Please install it.') return False - self.report_warning( - 'hlsnative has detected features it does not support, ' - 'extraction will be delegated to ffmpeg') - fd = FFmpegFD(self.ydl, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) - return fd.real_download(filename, info_dict) + return self._delegate_to_ffmpeg(filename, info_dict) total_frags = 0 for line in s.splitlines(): diff --git a/youtube_dl/extractor/arkena.py b/youtube_dl/extractor/arkena.py index 50ffb442d..4495ddbb0 100644 --- a/youtube_dl/extractor/arkena.py +++ b/youtube_dl/extractor/arkena.py @@ -93,8 +93,7 @@ class ArkenaIE(InfoExtractor): exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None)) if kind == 'm3u8' or 'm3u8' in exts: formats.extend(self._extract_m3u8_formats( - f_url, video_id, 'mp4', - entry_protocol='m3u8' if is_live else 'm3u8_native', + f_url, video_id, 'mp4', 'm3u8_native', m3u8_id=kind, fatal=False, live=is_live)) elif kind == 'flash' or 'f4m' in exts: formats.extend(self._extract_f4m_formats( diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index b1dfacf80..dd2529a6d 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -160,8 +160,7 @@ class CeskaTelevizeIE(InfoExtractor): for format_id, stream_url in item.get('streamUrls', {}).items(): if 'playerType=flash' in stream_url: stream_formats = self._extract_m3u8_formats( - stream_url, playlist_id, 'mp4', - entry_protocol='m3u8' if is_live else 'm3u8_native', + stream_url, playlist_id, 'mp4', 'm3u8_native', m3u8_id='hls-%s' % format_id, fatal=False) else: stream_formats = self._extract_mpd_formats( diff --git a/youtube_dl/extractor/eyedotv.py b/youtube_dl/extractor/eyedotv.py index 2f3035147..f62ddebae 100644 --- a/youtube_dl/extractor/eyedotv.py +++ b/youtube_dl/extractor/eyedotv.py @@ -54,7 +54,7 @@ class EyedoTVIE(InfoExtractor): 'id': video_id, 'title': title, 'formats': self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'), + m3u8_url, video_id, 'mp4', 'm3u8_native'), 'description': xpath_text(video_data, _add_ns('Description')), 'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))), 'uploader': xpath_text(video_data, _add_ns('Createur')), diff --git a/youtube_dl/extractor/freshlive.py b/youtube_dl/extractor/freshlive.py index a90f9156c..72a845945 100644 --- a/youtube_dl/extractor/freshlive.py +++ b/youtube_dl/extractor/freshlive.py @@ -56,9 +56,8 @@ class FreshLiveIE(InfoExtractor): is_live = info.get('liveStreamUrl') is not None formats = self._extract_m3u8_formats( - stream_url, video_id, ext='mp4', - entry_protocol='m3u8' if is_live else 'm3u8_native', - m3u8_id='hls') + stream_url, video_id, 'mp4', + 'm3u8_native', m3u8_id='hls') if is_live: title = self._live_title(title) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index c863413bf..7f946c6ed 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -119,7 +119,8 @@ class LivestreamIE(InfoExtractor): m3u8_url = video_data.get('m3u8_url') if m3u8_url: formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) f4m_url = video_data.get('f4m_url') if f4m_url: @@ -158,11 +159,11 @@ class LivestreamIE(InfoExtractor): if smil_url: formats.extend(self._extract_smil_formats(smil_url, broadcast_id)) - entry_protocol = 'm3u8' if is_live else 'm3u8_native' m3u8_url = stream_info.get('m3u8_url') if m3u8_url: formats.extend(self._extract_m3u8_formats( - m3u8_url, broadcast_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False)) + m3u8_url, broadcast_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) rtsp_url = stream_info.get('rtsp_url') if rtsp_url: @@ -276,7 +277,7 @@ class LivestreamOriginalIE(InfoExtractor): 'view_count': view_count, } - def _extract_video_formats(self, video_data, video_id, entry_protocol): + def _extract_video_formats(self, video_data, video_id): formats = [] progressive_url = video_data.get('progressiveUrl') @@ -289,7 +290,8 @@ class LivestreamOriginalIE(InfoExtractor): m3u8_url = video_data.get('httpUrl') if m3u8_url: formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False)) + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) rtsp_url = video_data.get('rtspUrl') if rtsp_url: @@ -340,11 +342,10 @@ class LivestreamOriginalIE(InfoExtractor): } video_data = self._download_json(stream_url, content_id) is_live = video_data.get('isLive') - entry_protocol = 'm3u8' if is_live else 'm3u8_native' info.update({ 'id': content_id, 'title': self._live_title(info['title']) if is_live else info['title'], - 'formats': self._extract_video_formats(video_data, content_id, entry_protocol), + 'formats': self._extract_video_formats(video_data, content_id), 'is_live': is_live, }) return info diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 7c42a4f54..dc2719cf9 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -432,8 +432,7 @@ class VKIE(VKBaseIE): }) elif format_id == 'hls': formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', - entry_protocol='m3u8' if is_live else 'm3u8_native', + format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False, live=is_live)) elif format_id == 'rtmp': formats.append({ From c73e330e7adc9c0c15ac51aeea8fbb7dad95351a Mon Sep 17 00:00:00 2001 From: Random User <rndusr@posteo.de> Date: Sat, 25 Mar 2017 19:38:30 +0100 Subject: [PATCH 0542/1696] _find_jwplayer_data() returns dict or None This simplifies code for callers of `_find_jwplayer_data()` which no longer have to run `_parse_json()` on the return value. It also makes sure that `_find_jwplayer_data()` returns either a `dict` or `None` and nothing else. --- youtube_dl/extractor/common.py | 18 ++++++++++++------ youtube_dl/extractor/generic.py | 12 ++++-------- youtube_dl/extractor/tvnoe.py | 5 ++--- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index eb3c091aa..c2ca73ee1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2161,18 +2161,24 @@ class InfoExtractor(object): }) return formats - @staticmethod - def _find_jwplayer_data(webpage): + def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): mobj = re.search( r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)', webpage) if mobj: - return mobj.group('options') + try: + jwplayer_data = self._parse_json(mobj.group('options'), + video_id=video_id, + transform_source=transform_source) + except ExtractorError: + pass + else: + if isinstance(jwplayer_data, dict): + return jwplayer_data def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): - jwplayer_data = self._parse_json( - self._find_jwplayer_data(webpage), video_id, - transform_source=js_to_json) + jwplayer_data = self._find_jwplayer_data( + webpage, video_id, transform_source=js_to_json) return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c8c103ae3..3fe0237b6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2518,14 +2518,10 @@ class GenericIE(InfoExtractor): self._sort_formats(entry['formats']) return self.playlist_result(entries) - jwplayer_data_str = self._find_jwplayer_data(webpage) - if jwplayer_data_str: - try: - jwplayer_data = self._parse_json( - jwplayer_data_str, video_id, transform_source=js_to_json) - return self._parse_jwplayer_data(jwplayer_data, video_id) - except ExtractorError: - pass + jwplayer_data = self._find_jwplayer_data( + webpage, video_id, transform_source=js_to_json) + if jwplayer_data: + return self._parse_jwplayer_data(jwplayer_data, video_id) def check_video(vurl): if YoutubeIE.suitable(vurl): diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py index 1a5b76bf2..26a5aeae4 100644 --- a/youtube_dl/extractor/tvnoe.py +++ b/youtube_dl/extractor/tvnoe.py @@ -31,9 +31,8 @@ class TVNoeIE(InfoExtractor): r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL') ifs_page = self._download_webpage(iframe_url, video_id) - jwplayer_data = self._parse_json( - self._find_jwplayer_data(ifs_page), - video_id, transform_source=js_to_json) + jwplayer_data = self._find_jwplayer_data( + ifs_page, video_id, transform_source=js_to_json) info_dict = self._parse_jwplayer_data( jwplayer_data, video_id, require_title=False, base_url=iframe_url) From 51098426b83a8ebce4b0c08e869ce023232089fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Mar 2017 02:30:10 +0700 Subject: [PATCH 0543/1696] [utils] Introduce expand_path --- test/test_utils.py | 10 ++++++++++ youtube_dl/utils.py | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 173c49514..8c50b46e8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -56,6 +56,7 @@ from youtube_dl.utils import ( read_batch_urls, sanitize_filename, sanitize_path, + expand_path, prepend_extension, replace_extension, remove_start, @@ -95,6 +96,8 @@ from youtube_dl.utils import ( from youtube_dl.compat import ( compat_chr, compat_etree_fromstring, + compat_getenv, + compat_setenv, compat_urlparse, compat_parse_qs, ) @@ -214,6 +217,13 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') + def test_expand_path(self): + compat_setenv('YOUTUBE-DL-EXPATH-PATH', 'expanded') + self.assertEqual(expand_path('%YOUTUBE-DL-EXPATH-PATH%'), 'expanded') + self.assertEqual(expand_path('%HOMEPATH%'), compat_getenv('HOMEPATH')) + self.assertEqual(expand_path('~'), compat_getenv('HOME')) + self.assertEqual(expand_path('~/%YOUTUBE-DL-EXPATH-PATH%'), '%s/expanded' % compat_getenv('HOME')) + def test_prepend_extension(self): self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d293c7498..2340bc306 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -39,6 +39,7 @@ from .compat import ( compat_basestring, compat_chr, compat_etree_fromstring, + compat_expanduser, compat_html_entities, compat_html_entities_html5, compat_http_client, @@ -539,6 +540,11 @@ def sanitized_Request(url, *args, **kwargs): return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs) +def expand_path(s): + """Expand shell variables and ~""" + return os.path.expandvars(compat_expanduser(s)) + + def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = [] From 590bc6f6a1cb513852a22f6db0ee36e9bd138f64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Mar 2017 02:31:16 +0700 Subject: [PATCH 0544/1696] Use expand_path where appropriate (closes #12556) --- youtube_dl/YoutubeDL.py | 6 +++--- youtube_dl/__init__.py | 8 ++++---- youtube_dl/cache.py | 9 ++++++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index cb502c26f..21586f0f4 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -29,7 +29,6 @@ import random from .compat import ( compat_basestring, compat_cookiejar, - compat_expanduser, compat_get_terminal_size, compat_http_client, compat_kwargs, @@ -54,6 +53,7 @@ from .utils import ( encode_compat_str, encodeFilename, error_to_compat_str, + expand_path, ExtractorError, format_bytes, formatSeconds, @@ -672,7 +672,7 @@ class YoutubeDL(object): FORMAT_RE.format(numeric_field), r'%({0})s'.format(numeric_field), outtmpl) - tmpl = compat_expanduser(outtmpl) + tmpl = expand_path(outtmpl) filename = tmpl % template_dict # Temporary fix for #4787 # 'Treat' all problem characters by passing filename through preferredencoding @@ -2170,7 +2170,7 @@ class YoutubeDL(object): if opts_cookiefile is None: self.cookiejar = compat_cookiejar.CookieJar() else: - opts_cookiefile = compat_expanduser(opts_cookiefile) + opts_cookiefile = expand_path(opts_cookiefile) self.cookiejar = compat_cookiejar.MozillaCookieJar( opts_cookiefile) if os.access(opts_cookiefile, os.R_OK): diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 2f640607f..f15606568 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -16,7 +16,6 @@ from .options import ( parseOpts, ) from .compat import ( - compat_expanduser, compat_getpass, compat_shlex_split, workaround_optparse_bug9161, @@ -26,6 +25,7 @@ from .utils import ( decodeOption, DEFAULT_OUTTMPL, DownloadError, + expand_path, match_filter_func, MaxDownloadsReached, preferredencoding, @@ -88,7 +88,7 @@ def _real_main(argv=None): batchfd = sys.stdin else: batchfd = io.open( - compat_expanduser(opts.batchfile), + expand_path(opts.batchfile), 'r', encoding='utf-8', errors='ignore') batch_urls = read_batch_urls(batchfd) if opts.verbose: @@ -238,7 +238,7 @@ def _real_main(argv=None): any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json any_printing = opts.print_json - download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive + download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive # PostProcessors postprocessors = [] @@ -449,7 +449,7 @@ def _real_main(argv=None): try: if opts.load_info_filename is not None: - retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename)) + retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename)) else: retcode = ydl.download(all_urls) except MaxDownloadsReached: diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py index 5fe839eb1..7bdade1bd 100644 --- a/youtube_dl/cache.py +++ b/youtube_dl/cache.py @@ -8,8 +8,11 @@ import re import shutil import traceback -from .compat import compat_expanduser, compat_getenv -from .utils import write_json_file +from .compat import compat_getenv +from .utils import ( + expand_path, + write_json_file, +) class Cache(object): @@ -21,7 +24,7 @@ class Cache(object): if res is None: cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache') res = os.path.join(cache_root, 'youtube-dl') - return compat_expanduser(res) + return expand_path(res) def _get_cache_fn(self, section, key, dtype): assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ From 5b7cc56b05ff4e3936da7a7c0bec5f8d5c9f27c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Mar 2017 02:32:14 +0700 Subject: [PATCH 0545/1696] [atresplayer] PEP 8 --- youtube_dl/extractor/atresplayer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index bfda1e24e..99af6dc5a 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -156,8 +156,8 @@ class AtresPlayerIE(InfoExtractor): if format_id == 'token' or not video_url.startswith('http'): continue if 'geodeswowsmpra3player' in video_url: - f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] - f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) + # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] + # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) # this videos are protected by DRM, the f4m downloader doesn't support them continue video_url_hd = video_url.replace('free_es', 'es') From 15495cf3e5f8dbab5559936006df77e3ac0a370b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Mar 2017 02:32:46 +0700 Subject: [PATCH 0546/1696] [franceculture] PEP 8 --- youtube_dl/extractor/franceculture.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py index df3d757f3..b8fa17588 100644 --- a/youtube_dl/extractor/franceculture.py +++ b/youtube_dl/extractor/franceculture.py @@ -6,7 +6,6 @@ from ..utils import ( determine_ext, extract_attributes, int_or_none, - unified_strdate, ) From d212c93d16bbb6aeb8645f1dbb15a78f3d9414a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Mar 2017 02:34:25 +0700 Subject: [PATCH 0547/1696] [pluralsight] PEP 8 --- youtube_dl/extractor/pluralsight.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 0c6e036ca..e45d9fe55 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -169,11 +169,10 @@ class PluralsightIE(PluralsightBaseIE): collection = course['modules'] - module, clip = None, None + clip = None for module_ in collection: if name in (module_.get('moduleName'), module_.get('name')): - module = module_ for clip_ in module_.get('clips', []): clip_index = clip_.get('clipIndex') if clip_index is None: From 41c5e60dd57c0df10f4aa05dee95af2bbc1dc8dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Mar 2017 03:07:56 +0700 Subject: [PATCH 0548/1696] [test_utils] Fix expand_path tests --- test/test_utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 8c50b46e8..b9a02666d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -218,11 +218,16 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('./../abc'), '..\\abc') def test_expand_path(self): + def env(var): + return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) + compat_setenv('YOUTUBE-DL-EXPATH-PATH', 'expanded') - self.assertEqual(expand_path('%YOUTUBE-DL-EXPATH-PATH%'), 'expanded') - self.assertEqual(expand_path('%HOMEPATH%'), compat_getenv('HOMEPATH')) + self.assertEqual(expand_path(env('YOUTUBE-DL-EXPATH-PATH')), 'expanded') + self.assertEqual(expand_path(env('HOMEPATH')), compat_getenv('HOMEPATH')) self.assertEqual(expand_path('~'), compat_getenv('HOME')) - self.assertEqual(expand_path('~/%YOUTUBE-DL-EXPATH-PATH%'), '%s/expanded' % compat_getenv('HOME')) + self.assertEqual( + expand_path('~/%s' % env('YOUTUBE-DL-EXPATH-PATH')), + '%s/expanded' % compat_getenv('HOME')) def test_prepend_extension(self): self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') From a426ef6d783038e570db252a2e9e72800ffcb381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Mar 2017 03:22:48 +0700 Subject: [PATCH 0549/1696] [test_utils] Do not use dash in env variables' names --- test/test_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index b9a02666d..aa4569b81 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -221,12 +221,12 @@ class TestUtil(unittest.TestCase): def env(var): return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) - compat_setenv('YOUTUBE-DL-EXPATH-PATH', 'expanded') - self.assertEqual(expand_path(env('YOUTUBE-DL-EXPATH-PATH')), 'expanded') - self.assertEqual(expand_path(env('HOMEPATH')), compat_getenv('HOMEPATH')) + compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded') + self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded') + self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME')) self.assertEqual(expand_path('~'), compat_getenv('HOME')) self.assertEqual( - expand_path('~/%s' % env('YOUTUBE-DL-EXPATH-PATH')), + expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')), '%s/expanded' % compat_getenv('HOME')) def test_prepend_extension(self): From 942b44a0525f677924c660bcb00902d705d91fc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Mar 2017 03:24:25 +0700 Subject: [PATCH 0550/1696] [test_compat] Do not use dash in env variables' names --- test/test_compat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index b57424948..d6c54e135 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -27,11 +27,11 @@ from youtube_dl.compat import ( class TestCompat(unittest.TestCase): def test_compat_getenv(self): test_str = 'тест' - compat_setenv('YOUTUBE-DL-TEST', test_str) - self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) + compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str) + self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str) def test_compat_setenv(self): - test_var = 'YOUTUBE-DL-TEST' + test_var = 'YOUTUBE_DL_COMPAT_SETENV' test_str = 'тест' compat_setenv(test_var, test_str) compat_getenv(test_var) From 2bfaf89b6cc6dd07ed6ca32086c72a98b67c20ba Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 25 Mar 2017 23:06:33 +0100 Subject: [PATCH 0551/1696] [downloader/hls] move check for m3u8 live streams to get_suitable_downloader --- youtube_dl/downloader/__init__.py | 3 +++ youtube_dl/downloader/hls.py | 21 ++++++++------------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 16952e359..2e485df9d 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -43,6 +43,9 @@ def get_suitable_downloader(info_dict, params={}): if ed.can_download(info_dict): return ed + if protocol.startswith('m3u8') and info_dict.get('is_live'): + return FFmpegFD + if protocol == 'm3u8' and params.get('hls_prefer_native') is True: return HlsFD diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 7534e4da5..4989abce1 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -30,15 +30,6 @@ class HlsFD(FragmentFD): FD_NAME = 'hlsnative' - def _delegate_to_ffmpeg(self, filename, info_dict): - self.report_warning( - 'hlsnative has detected features it does not support, ' - 'extraction will be delegated to ffmpeg') - fd = FFmpegFD(self.ydl, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) - return fd.real_download(filename, info_dict) - @staticmethod def can_download(manifest, info_dict): UNSUPPORTED_FEATURES = ( @@ -62,12 +53,10 @@ class HlsFD(FragmentFD): ) check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest) + check_results.append(not info_dict.get('is_live')) return all(check_results) def real_download(self, filename, info_dict): - if info_dict.get('is_live'): - return self._delegate_to_ffmpeg(filename, info_dict) - man_url = info_dict['url'] self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) @@ -79,7 +68,13 @@ class HlsFD(FragmentFD): if info_dict.get('extra_param_to_segment_url'): self.report_error('pycrypto not found. Please install it.') return False - return self._delegate_to_ffmpeg(filename, info_dict) + self.report_warning( + 'hlsnative has detected features it does not support, ' + 'extraction will be delegated to ffmpeg') + fd = FFmpegFD(self.ydl, self.params) + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + return fd.real_download(filename, info_dict) total_frags = 0 for line in s.splitlines(): From f7923a4c399e0ce8e6cd230db92aefbfcff297c3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 26 Mar 2017 22:07:12 +0800 Subject: [PATCH 0552/1696] [ChangeLog] Update after #12307 --- ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index 45d6f244d..adc64053b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ version <unreleased> +Core +* Don't raise an error if JWPlayer config data is not a Javascript object + literal. _find_jwplayer_data() now returns a dict rather than an str. + (#12307) + Extractors * [afreecatv] Fix extraction (#12179) From 82eefd0be00b7557782ae75602b463e226dd964f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Mar 2017 23:39:12 +0700 Subject: [PATCH 0553/1696] [ChangeLog] Actualize --- ChangeLog | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index adc64053b..e79067cff 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,11 +2,21 @@ version <unreleased> Core * Don't raise an error if JWPlayer config data is not a Javascript object - literal. _find_jwplayer_data() now returns a dict rather than an str. - (#12307) + literal. _find_jwplayer_data now returns a dict rather than an str. (#12307) +* Expand environment variables for options representing paths (#12556) ++ [utils] Introduce expand_path +* [downloader/hls] Delegate downloading to ffmpeg immediately for live streams Extractors * [afreecatv] Fix extraction (#12179) ++ [atvat] Add support for atv.at (#5325) ++ [fox] Add metadata extraction (#12391) ++ [atresplayer] Extract DASH formats ++ [atresplayer] Extract HD manifest (#12548) +* [atresplayer] Fix login error detection (#12548) +* [franceculture] Fix extraction (#12547) +* [youtube] Improve URL regular expression (#12538) +* [generic] Do not follow redirects to the same URL version 2017.03.24 From 9e691da06791a0a617ed69ef21e272536e247ed1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Mar 2017 08:11:40 +0700 Subject: [PATCH 0554/1696] release 2017.03.26 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index dfff41d2d..2f717926c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.26*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.26** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.24 +[debug] youtube-dl version 2017.03.26 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e79067cff..07725b12a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.03.26 Core * Don't raise an error if JWPlayer config data is not a Javascript object diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7c99ba3c2..e9dbc021b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -67,6 +67,7 @@ - **arte.tv:playlist** - **AtresPlayer** - **ATTTechChannel** + - **ATVAt** - **AudiMedia** - **AudioBoom** - **audiomack** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 13904c724..94e8198ec 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.24' +__version__ = '2017.03.26' From aea1dccbd07b073ef36b325a9a21eb3f642322d9 Mon Sep 17 00:00:00 2001 From: Tithen-Firion <tithen.firion.0@gmail.com> Date: Tue, 28 Mar 2017 15:42:03 +0200 Subject: [PATCH 0555/1696] [openload] fix extractor --- youtube_dl/extractor/openload.py | 73 +++++++++++++------------------- 1 file changed, 30 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 58ffde541..d8036b54a 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -75,51 +75,38 @@ class OpenloadIE(InfoExtractor): '<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>', webpage, 'openload ID') - video_url_chars = [] - - first_char = ord(ol_id[0]) - key = first_char - 55 - maxKey = max(2, key) - key = min(maxKey, len(ol_id) - 38) - t = ol_id[key:key + 36] - - hashMap = {} - v = ol_id.replace(t, '') - h = 0 - - while h < len(t): - f = t[h:h + 3] - i = int(f, 8) - hashMap[h / 3] = i - h += 3 - - h = 0 - H = 0 - while h < len(v): - B = '' - C = '' - if len(v) >= h + 2: - B = v[h:h + 2] - if len(v) >= h + 3: - C = v[h:h + 3] - i = int(B, 16) - h += 2 - if H % 3 == 0: - i = int(C, 8) - h += 1 - elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60: - i = int(C, 10) - h += 1 - index = H % 7 - - A = hashMap[index] - i ^= 213 - i ^= A - video_url_chars.append(compat_chr(i)) - H += 1 + decoded = '' + a = ol_id[0:24] + b = [] + for i in range(0, len(a), 8): + b.append(int(a[i:i + 8] or '0', 16)) + ol_id = ol_id[24:] + j = 0 + k = 0 + while j < len(ol_id): + c = 128 + d = 0 + e = 0 + f = 0 + _more = True + while _more: + if j + 1 >= len(ol_id): + c = 143 + f = int(ol_id[j:j + 2] or '0', 16) + j += 2 + d += (f & 127) << e + e += 7 + _more = f >= c + g = d ^ b[k % 3] + for i in range(4): + char_dec = (g >> 8 * i) & (c + 127) + char = compat_chr(char_dec) + if char != '#': + decoded += char + k += 1 video_url = 'https://openload.co/stream/%s?mime=true' - video_url = video_url % (''.join(video_url_chars)) + video_url = video_url % decoded title = self._og_search_title(webpage, default=None) or self._search_regex( r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, From 12ee65ea0d09c6ac42ad06b3d561b4a26db00cfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 28 Mar 2017 23:35:48 +0700 Subject: [PATCH 0556/1696] [options] Mention ISM for --fragment-retries and --skip-unavailable-fragments --- youtube_dl/options.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 6b811535f..2d2f5e47b 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -459,11 +459,11 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--fragment-retries', dest='fragment_retries', metavar='RETRIES', default=10, - help='Number of retries for a fragment (default is %default), or "infinite" (DASH and hlsnative only)') + help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)') downloader.add_option( '--skip-unavailable-fragments', action='store_true', dest='skip_unavailable_fragments', default=True, - help='Skip unavailable fragments (DASH and hlsnative only)') + help='Skip unavailable fragments (DASH, hlsnative and ISM)') downloader.add_option( '--abort-on-unavailable-fragment', action='store_false', dest='skip_unavailable_fragments', From 128244657b92582f7f4793c2d1be86b04032ac7f Mon Sep 17 00:00:00 2001 From: plroman <dev@plr.pm> Date: Tue, 28 Mar 2017 23:23:20 +0200 Subject: [PATCH 0557/1696] [allocine] Fix extraction --- youtube_dl/extractor/allocine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 90f11d39f..0463a070b 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -70,7 +70,7 @@ class AllocineIE(InfoExtractor): if model: model_data = self._parse_json(model, display_id) - for video_url in model_data['sources'].values(): + for video_url in model_data['videos'][0]['sources'].values(): video_id, format_id = url_basename(video_url).split('_')[:2] formats.append({ 'format_id': format_id, @@ -78,7 +78,7 @@ class AllocineIE(InfoExtractor): 'url': video_url, }) - title = model_data['title'] + title = model_data['videos'][0]['title'] else: video_id = display_id media_data = self._download_json( From 639e5b2a848c0a73e8525472dd8bb4b14a8c4746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 29 Mar 2017 04:43:12 +0700 Subject: [PATCH 0558/1696] [allocine] Extract more metadata --- youtube_dl/extractor/allocine.py | 44 ++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 0463a070b..cd533acfc 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -2,9 +2,13 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - remove_end, + int_or_none, qualities, + remove_end, + try_get, + unified_timestamp, url_basename, ) @@ -22,6 +26,10 @@ class AllocineIE(InfoExtractor): 'title': 'Astérix - Le Domaine des Dieux Teaser VF', 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', 'thumbnail': r're:http://.*\.jpg', + 'duration': 39, + 'timestamp': 1404273600, + 'upload_date': '20140702', + 'view_count': int, }, }, { 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', @@ -33,6 +41,10 @@ class AllocineIE(InfoExtractor): 'title': 'Planes 2 Bande-annonce VF', 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', 'thumbnail': r're:http://.*\.jpg', + 'duration': 69, + 'timestamp': 1385659800, + 'upload_date': '20131128', + 'view_count': int, }, }, { 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', @@ -44,6 +56,10 @@ class AllocineIE(InfoExtractor): 'title': 'Dragons 2 - Bande annonce finale VF', 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', 'thumbnail': r're:http://.*\.jpg', + 'duration': 144, + 'timestamp': 1397589900, + 'upload_date': '20140415', + 'view_count': int, }, }, { 'url': 'http://www.allocine.fr/video/video-19550147/', @@ -69,34 +85,37 @@ class AllocineIE(InfoExtractor): r'data-model="([^"]+)"', webpage, 'data model', default=None) if model: model_data = self._parse_json(model, display_id) - - for video_url in model_data['videos'][0]['sources'].values(): + video = model_data['videos'][0] + title = video['title'] + for video_url in video['sources'].values(): video_id, format_id = url_basename(video_url).split('_')[:2] formats.append({ 'format_id': format_id, 'quality': quality(format_id), 'url': video_url, }) - - title = model_data['videos'][0]['title'] + duration = int_or_none(video.get('duration')) + view_count = int_or_none(video.get('view_count')) + timestamp = unified_timestamp(try_get( + video, lambda x: x['added_at']['date'], compat_str)) else: video_id = display_id media_data = self._download_json( 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) + title = remove_end( + self._html_search_regex( + r'(?s)<title>(.+?)', webpage, 'title').strip(), + ' - AlloCiné') for key, value in media_data['video'].items(): if not key.endswith('Path'): continue - format_id = key[:-len('Path')] formats.append({ 'format_id': format_id, 'quality': quality(format_id), 'url': value, }) - - title = remove_end(self._html_search_regex( - r'(?s)(.+?)', webpage, 'title' - ).strip(), ' - AlloCiné') + duration, view_count, timestamp = [None] * 3 self._sort_formats(formats) @@ -104,7 +123,10 @@ class AllocineIE(InfoExtractor): 'id': video_id, 'display_id': display_id, 'title': title, + 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), + 'duration': duration, + 'timestamp': timestamp, + 'view_count': view_count, 'formats': formats, - 'description': self._og_search_description(webpage), } From 82be732b174ea8e9984e7b0582c69e41b266d1da Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 31 Mar 2017 12:24:23 +0100 Subject: [PATCH 0559/1696] [adn] Add new extractor --- youtube_dl/extractor/adn.py | 136 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 137 insertions(+) create mode 100644 youtube_dl/extractor/adn.py diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py new file mode 100644 index 000000000..e44caa00b --- /dev/null +++ b/youtube_dl/extractor/adn.py @@ -0,0 +1,136 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import json +import os + +from .common import InfoExtractor +from ..aes import aes_cbc_decrypt +from ..compat import compat_ord +from ..utils import ( + bytes_to_intlist, + ExtractorError, + float_or_none, + intlist_to_bytes, + srt_subtitles_timecode, + strip_or_none, +) + + +class ADNIE(InfoExtractor): + IE_DESC = 'Anime Digital Network' + _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P\d+)' + _TEST = { + 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', + 'md5': 'e497370d847fd79d9d4c74be55575c7a', + 'info_dict': { + 'id': '7778', + 'ext': 'mp4', + 'title': 'Blue Exorcist - Kyôto Saga - Épisode 1', + 'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', + } + } + + def _get_subtitles(self, sub_path, video_id): + if not sub_path: + return None + + enc_subtitles = self._download_webpage( + 'http://animedigitalnetwork.fr/' + sub_path, + video_id, fatal=False) + if not enc_subtitles: + return None + + # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js + dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( + bytes_to_intlist(base64.b64decode(enc_subtitles[24:])), + bytes_to_intlist(b'\xb5@\xcfq\xa3\x98"N\xe4\xf3\x12\x98}}\x16\xd8'), + bytes_to_intlist(base64.b64decode(enc_subtitles[:24])) + )) + subtitles_json = self._parse_json( + dec_subtitles[:-compat_ord(dec_subtitles[-1])], + None, fatal=False) + if not subtitles_json: + return None + + subtitles = {} + for sub_lang, sub in subtitles_json.items(): + srt = '' + for num, current in enumerate(sub): + start, end, text = ( + float_or_none(current.get('startTime')), + float_or_none(current.get('endTime')), + current.get('text')) + if start is None or end is None or text is None: + continue + srt += os.linesep.join( + ( + '%d' % num, + '%s --> %s' % ( + srt_subtitles_timecode(start), + srt_subtitles_timecode(end)), + text, + os.linesep, + )) + + if sub_lang == 'vostf': + sub_lang = 'fr' + subtitles.setdefault(sub_lang, []).extend([{ + 'ext': 'json', + 'data': json.dumps(sub), + }, { + 'ext': 'srt', + 'data': srt, + }]) + return subtitles + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_config = self._parse_json(self._search_regex( + r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id) + + video_info = {} + video_info_str = self._search_regex( + r'videoInfo\s*=\s*({.+});', webpage, + 'video info', fatal=False) + if video_info_str: + video_info = self._parse_json( + video_info_str, video_id, fatal=False) or {} + + options = player_config.get('options') or {} + metas = options.get('metas') or {} + title = metas.get('title') or video_info['title'] + links = player_config.get('links') or {} + + formats = [] + for format_id, qualities in links.items(): + for load_balancer_url in qualities.values(): + load_balancer_data = self._download_json( + load_balancer_url, video_id, fatal=False) or {} + m3u8_url = load_balancer_data.get('location') + if not m3u8_url: + continue + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=format_id, fatal=False) + if format_id == 'vf': + for f in m3u8_formats: + f['language'] = 'fr' + formats.extend(m3u8_formats) + error = options.get('error') + if not formats and error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': strip_or_none(metas.get('summary') or video_info.get('resume')), + 'thumbnail': video_info.get('image'), + 'formats': formats, + 'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id), + 'episode': metas.get('subtitle') or video_info.get('videoTitle'), + 'series': video_info.get('playlistTitle'), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6a7028a4d..43933ad5b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -19,6 +19,7 @@ from .acast import ( ACastChannelIE, ) from .addanime import AddAnimeIE +from .adn import ADNIE from .adobetv import ( AdobeTVIE, AdobeTVShowIE, From 3e943cfe09eda6ef9b0fa419fdd22155fbaa047f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 31 Mar 2017 14:54:06 +0100 Subject: [PATCH 0560/1696] [generic] pass base_url to _parse_jwplayer_data --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 274f81738..73911940c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2568,7 +2568,7 @@ class GenericIE(InfoExtractor): webpage, video_id, transform_source=js_to_json) if jwplayer_data: info = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False) + jwplayer_data, video_id, require_title=False, base_url=url) if not info.get('title'): info['title'] = video_title return info From 1640eb096166c81918125a0a7462eb2edb063167 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 31 Mar 2017 23:57:35 +0700 Subject: [PATCH 0561/1696] [YoutubeDL] Return early when extraction of url_transparent fails --- youtube_dl/YoutubeDL.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 21586f0f4..54bc8b06d 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -837,6 +837,12 @@ class YoutubeDL(object): ie_result['url'], ie_key=ie_result.get('ie_key'), extra_info=extra_info, download=False, process=False) + # extract_info may return None when ignoreerrors is enabled and + # extraction failed with an error, don't crash and return early + # in this case + if not info: + return info + force_properties = dict( (k, v) for k, v in ie_result.items() if v is not None) for f in ('_type', 'url', 'ie_key'): From 7453999580f2809153a84420d3ca72b24186c02b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Apr 2017 00:25:27 +0700 Subject: [PATCH 0562/1696] [packtpub] Add extractor (closes #12610) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/packtpub.py | 138 +++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 youtube_dl/extractor/packtpub.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 43933ad5b..6ad7444fe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -729,6 +729,10 @@ from .orf import ( ORFFM4IE, ORFIPTVIE, ) +from .packtpub import ( + PacktPubIE, + PacktPubCourseIE, +) from .pandatv import PandaTVIE from .pandoratv import PandoraTVIE from .parliamentliveuk import ParliamentLiveUKIE diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py new file mode 100644 index 000000000..881f3bcc7 --- /dev/null +++ b/youtube_dl/extractor/packtpub.py @@ -0,0 +1,138 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + clean_html, + ExtractorError, + remove_end, + strip_or_none, + unified_timestamp, + urljoin, +) + + +class PacktPubBaseIE(InfoExtractor): + _PACKT_BASE = 'https://www.packtpub.com' + _MAPT_REST = '%s/mapt-rest' % _PACKT_BASE + + +class PacktPubIE(PacktPubBaseIE): + _VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P\d+)/(?P\d+)/(?P\d+)' + + _TEST = { + 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro', + 'md5': '1e74bd6cfd45d7d07666f4684ef58f70', + 'info_dict': { + 'id': '20530', + 'ext': 'mp4', + 'title': 'Project Intro', + 'thumbnail': r're:(?i)^https?://.*\.jpg', + 'timestamp': 1490918400, + 'upload_date': '20170331', + }, + } + + def _handle_error(self, response): + if response.get('status') != 'success': + raise ExtractorError( + '% said: %s' % (self.IE_NAME, response['message']), + expected=True) + + def _download_json(self, *args, **kwargs): + response = super(PacktPubIE, self)._download_json(*args, **kwargs) + self._handle_error(response) + return response + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + course_id, chapter_id, video_id = mobj.group( + 'course_id', 'chapter_id', 'id') + + video = self._download_json( + '%s/users/me/products/%s/chapters/%s/sections/%s' + % (self._MAPT_REST, course_id, chapter_id, video_id), video_id, + 'Downloading JSON video')['data'] + + content = video.get('content') + if not content: + raise ExtractorError('This video is locked', expected=True) + + video_url = content['file'] + + metadata = self._download_json( + '%s/products/%s/chapters/%s/sections/%s/metadata' + % (self._MAPT_REST, course_id, chapter_id, video_id), + video_id)['data'] + + title = metadata['pageTitle'] + course_title = metadata.get('title') + if course_title: + title = remove_end(title, ' - %s' % course_title) + timestamp = unified_timestamp(metadata.get('publicationDate')) + thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath')) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + } + + +class PacktPubCourseIE(PacktPubBaseIE): + _VALID_URL = r'(?Phttps?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P\d+))' + _TEST = { + 'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215', + 'info_dict': { + 'id': '9781787122215', + 'title': 'Learn Nodejs by building 12 projects [Video]', + }, + 'playlist_count': 90, + } + + @classmethod + def suitable(cls, url): + return False if PacktPubIE.suitable(url) else super( + PacktPubCourseIE, cls).suitable(url) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + url, course_id = mobj.group('url', 'id') + + course = self._download_json( + '%s/products/%s/metadata' % (self._MAPT_REST, course_id), + course_id)['data'] + + entries = [] + for chapter_num, chapter in enumerate(course['tableOfContents'], 1): + if chapter.get('type') != 'chapter': + continue + children = chapter.get('children') + if not isinstance(children, list): + continue + chapter_info = { + 'chapter': chapter.get('title'), + 'chapter_number': chapter_num, + 'chapter_id': chapter.get('id'), + } + for section in children: + if section.get('type') != 'section': + continue + section_url = section.get('seoUrl') + if not isinstance(section_url, compat_str): + continue + entry = { + '_type': 'url_transparent', + 'url': urljoin(url + '/', section_url), + 'title': strip_or_none(section.get('title')), + 'description': clean_html(section.get('summary')), + 'ie_key': PacktPubIE.ie_key(), + } + entry.update(chapter_info) + entries.append(entry) + + return self.playlist_result(entries, course_id, course.get('title')) From 77c8ebe6318055cc34eaedca63f4866c4c47437a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 31 Mar 2017 23:28:24 +0100 Subject: [PATCH 0563/1696] [vrv] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vrv.py | 151 +++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 youtube_dl/extractor/vrv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6ad7444fe..1b427e256 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1182,6 +1182,7 @@ from .voxmedia import VoxMediaIE from .vporn import VpornIE from .vrt import VRTIE from .vrak import VrakIE +from .vrv import VRVIE from .medialaan import MedialaanIE from .vube import VubeIE from .vuclip import VuClipIE diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py new file mode 100644 index 000000000..33618c951 --- /dev/null +++ b/youtube_dl/extractor/vrv.py @@ -0,0 +1,151 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import json +import hashlib +import hmac +import random +import string +import time + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlencode, + compat_urlparse, +) +from ..utils import ( + float_or_none, + int_or_none, +) + + +class VRVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P[A-Z0-9]+)' + _TEST = { + 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT', + 'info_dict': { + 'id': 'GR9PNZ396', + 'ext': 'mp4', + 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT', + 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f', + 'uploader_id': 'seeso', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + _API_DOMAIN = None + _API_PARAMS = {} + _CMS_SIGNING = {} + + def _call_api(self, path, video_id, note, data=None): + base_url = self._API_DOMAIN + '/core/' + path + encoded_query = compat_urllib_parse_urlencode({ + 'oauth_consumer_key': self._API_PARAMS['oAuthKey'], + 'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), + 'oauth_signature_method': 'HMAC-SHA1', + 'oauth_timestamp': int(time.time()), + 'oauth_version': '1.0', + }) + headers = self.geo_verification_headers() + if data: + data = json.dumps(data).encode() + headers['Content-Type'] = 'application/json' + method = 'POST' if data else 'GET' + base_string = '&'.join([method, compat_urlparse.quote(base_url, ''), compat_urlparse.quote(encoded_query, '')]) + oauth_signature = base64.b64encode(hmac.new( + (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'), + base_string.encode(), hashlib.sha1).digest()).decode() + encoded_query += '&oauth_signature=' + compat_urlparse.quote(oauth_signature, '') + return self._download_json( + '?'.join([base_url, encoded_query]), video_id, + note='Downloading %s JSON metadata' % note, headers=headers, data=data) + + def _call_cms(self, path, video_id, note): + return self._download_json( + self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING, + note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) + + def _set_api_params(self, webpage, video_id): + if not self._API_PARAMS: + self._API_PARAMS = self._parse_json(self._search_regex( + r'window\.__APP_CONFIG__\s*=\s*({.+?})', + webpage, 'api config'), video_id)['cxApiParams'] + self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') + + def _set_cms_signing(self, video_id): + if not self._CMS_SIGNING: + self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing'] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + url, video_id, + headers=self.geo_verification_headers()) + media_resource = self._parse_json(self._search_regex( + r'window\.__INITIAL_STATE__\s*=\s*({.+?})', + webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {} + + video_data = media_resource.get('json') + if not video_data: + self._set_api_params(webpage, video_id) + episode_path = self._call_api('cms_resource', video_id, 'episode resource path', data={ + 'resource_key': 'cms:/episodes/' + video_id, + })['__links__']['cms_resource']['href'] + self._set_cms_signing(video_id) + video_data = self._call_cms(episode_path, video_id, 'video') + title = video_data['title'] + + streams_json = media_resource.get('streams', {}).get('json', {}) + if not streams_json: + self._set_api_params(webpage, video_id) + streams_path = video_data['__links__']['streams']['href'] + self._set_cms_signing(video_id) + streams_json = self._call_cms(streams_path, video_id, 'streams') + + audio_locale = streams_json.get('audio_locale') + formats = [] + for stream_id, stream in streams_json.get('streams', {}).get('adaptive_hls', {}).items(): + stream_url = stream.get('url') + if not stream_url: + continue + stream_id = stream_id or audio_locale + m3u8_formats = self._extract_m3u8_formats( + stream_url, video_id, 'mp4', m3u8_id=stream_id, + note='Downloading %s m3u8 information' % stream_id, + fatal=False) + if audio_locale: + for f in m3u8_formats: + f['language'] = audio_locale + formats.extend(m3u8_formats) + self._sort_formats(formats) + + thumbnails = [] + for thumbnail in video_data.get('images', {}).get('thumbnails', []): + thumbnail_url = thumbnail.get('source') + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnails': thumbnails, + 'description': video_data.get('description'), + 'duration': float_or_none(video_data.get('duration_ms'), 1000), + 'uploader_id': video_data.get('channel_id'), + 'series': video_data.get('series_title'), + 'season': video_data.get('season_title'), + 'season_number': int_or_none(video_data.get('season_number')), + 'season_id': video_data.get('season_id'), + 'episode': title, + 'episode_number': int_or_none(video_data.get('episode_number')), + 'episode_id': video_data.get('production_episode_id'), + } From be61efdf1754d026f270f6d87446040231d56954 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 1 Apr 2017 07:26:40 +0100 Subject: [PATCH 0564/1696] [tvplay] Bypass geo restriction --- youtube_dl/extractor/tvplay.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 3eda0a399..99ff82a5d 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -225,7 +225,11 @@ class TVPlayIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - + geo_country = self._search_regex( + r'https?://[^/]+\.([a-z]{2})', url, + 'geo country', default=None) + if geo_country: + self._initialize_geo_bypass([geo_country.upper()]) video = self._download_json( 'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON') From e97fc8d6b837921ea8429727f026238b857e1b31 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 1 Apr 2017 07:50:24 +0100 Subject: [PATCH 0565/1696] [cwtv] extract ISM formats --- youtube_dl/extractor/cwtv.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py index 1ab9333b2..f4cf0f1c5 100644 --- a/youtube_dl/extractor/cwtv.py +++ b/youtube_dl/extractor/cwtv.py @@ -82,6 +82,11 @@ class CWTVIE(InfoExtractor): 'url': quality_url, 'tbr': tbr, }) + video_metadata = video_data['assetFields'] + ism_url = video_metadata.get('smoothStreamingUrl') + if ism_url: + formats.extend(self._extract_ism_formats( + ism_url, video_id, ism_id='mss', fatal=False)) self._sort_formats(formats) thumbnails = [{ @@ -90,8 +95,6 @@ class CWTVIE(InfoExtractor): 'height': image.get('height'), } for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None - video_metadata = video_data['assetFields'] - subtitles = { 'en': [{ 'url': video_metadata['UnicornCcUrl'], From ca77b92f94010bdf2d44de44cb23e32075b7dcaa Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 1 Apr 2017 09:33:23 +0100 Subject: [PATCH 0566/1696] [crunchyroll] pass geo verifcation proxy --- youtube_dl/extractor/crunchyroll.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index d15fd3744..2ed8b30bb 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -390,7 +390,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text else: webpage_url = 'http://www.' + mobj.group('url') - webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage') + webpage = self._download_webpage( + self._add_skip_wall(webpage_url), video_id, + headers=self.geo_verification_headers()) note_m = self._html_search_regex( r'
(.+?)
', webpage, 'trailer-notice', default='') @@ -565,7 +567,9 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): def _real_extract(self, url): show_id = self._match_id(url) - webpage = self._download_webpage(self._add_skip_wall(url), show_id) + webpage = self._download_webpage( + self._add_skip_wall(url), show_id, + headers=self.geo_verification_headers()) title = self._html_search_regex( r'(?s)]*>\s*(.*?)', webpage, 'title') From 2cd668ee591df4f271ed4394ba9b38262ae3c40e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Apr 2017 18:55:48 +0700 Subject: [PATCH 0567/1696] [xfileshare] Improve extraction and extract hls formats --- youtube_dl/extractor/xfileshare.py | 61 +++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index e616adce3..6de5b26d7 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..utils import ( decode_packed_codes, + determine_ext, ExtractorError, int_or_none, NO_DEFAULT, @@ -95,6 +96,16 @@ class XFileShareIE(InfoExtractor): # removed by administrator 'url': 'http://xvidstage.com/amfy7atlkx25', 'only_matching': True, + }, { + 'url': 'http://vidabc.com/i8ybqscrphfv', + 'info_dict': { + 'id': 'i8ybqscrphfv', + 'ext': 'mp4', + 'title': 're:Beauty and the Beast 2017', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -133,31 +144,45 @@ class XFileShareIE(InfoExtractor): webpage, 'title', default=None) or self._og_search_title( webpage, default=None) or video_id).strip() - def extract_video_url(default=NO_DEFAULT): - return self._search_regex( - (r'file\s*:\s*(["\'])(?Phttp.+?)\1,', - r'file_link\s*=\s*(["\'])(?Phttp.+?)\1', - r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?Phttp.+?)\2\)', - r']+src=(["\'])(?Phttp.+?)\1'), - webpage, 'file url', default=default, group='url') - - video_url = extract_video_url(default=None) - - if not video_url: + def extract_formats(default=NO_DEFAULT): + urls = [] + for regex in ( + r'file\s*:\s*(["\'])(?Phttp(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1', + r'file_link\s*=\s*(["\'])(?Phttp(?:(?!\1).)+)\1', + r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?Phttp(?:(?!\2).)+)\2\)', + r']+src=(["\'])(?Phttp(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'): + for mobj in re.finditer(regex, webpage): + video_url = mobj.group('url') + if video_url not in urls: + urls.append(video_url) + formats = [] + for video_url in urls: + if determine_ext(video_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + else: + formats.append({ + 'url': video_url, + 'format_id': 'sd', + }) + if not formats and default is not NO_DEFAULT: + return default + self._sort_formats(formats) + return formats + + formats = extract_formats(default=None) + + if not formats: webpage = decode_packed_codes(self._search_regex( r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))", webpage, 'packed code')) - video_url = extract_video_url() + formats = extract_formats() thumbnail = self._search_regex( r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None) - formats = [{ - 'format_id': 'sd', - 'url': video_url, - 'quality': 1, - }] - return { 'id': video_id, 'title': title, From eecea00d36f29f3b22e5936ed48fa91456ab066a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Apr 2017 18:56:35 +0700 Subject: [PATCH 0568/1696] [xfileshare] Add support for vidabc.com (closes #12589) --- youtube_dl/extractor/xfileshare.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 6de5b26d7..6856fb3bf 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -27,6 +27,7 @@ class XFileShareIE(InfoExtractor): ('vidto.me', 'Vidto'), ('streamin.to', 'Streamin.To'), ('xvidstage.com', 'XVIDSTAGE'), + ('vidabc.com', 'Vid ABC'), ) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) From 91399b2fcc95e72f052ee9eab8e12b68d1815c9e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 1 Apr 2017 13:32:38 +0100 Subject: [PATCH 0569/1696] [funimation] fix extraction(closes #10696)(#11773) --- youtube_dl/extractor/funimation.py | 209 ++++++++++------------------- 1 file changed, 72 insertions(+), 137 deletions(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index eba00cd5a..e44a2a87f 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -7,9 +7,9 @@ from ..compat import ( compat_urllib_parse_unquote_plus, ) from ..utils import ( - clean_html, determine_ext, int_or_none, + js_to_json, sanitized_Request, ExtractorError, urlencode_postdata @@ -17,34 +17,26 @@ from ..utils import ( class FunimationIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?funimation\.com/shows/[^/]+/videos/(?:official|promotional)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P[^/?#&]+)' _NETRC_MACHINE = 'funimation' _TESTS = [{ - 'url': 'http://www.funimation.com/shows/air/videos/official/breeze', + 'url': 'https://www.funimation.com/shows/hacksign/role-play/', 'info_dict': { - 'id': '658', - 'display_id': 'breeze', - 'ext': 'mp4', - 'title': 'Air - 1 - Breeze', - 'description': 'md5:1769f43cd5fc130ace8fd87232207892', - 'thumbnail': r're:https?://.*\.jpg', - }, - 'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed', - }, { - 'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play', - 'info_dict': { - 'id': '31128', + 'id': '91144', 'display_id': 'role-play', 'ext': 'mp4', - 'title': '.hack//SIGN - 1 - Role Play', + 'title': '.hack//SIGN - Role Play', 'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd', 'thumbnail': r're:https?://.*\.jpg', }, - 'skip': 'Access without user interaction is forbidden by CloudFlare', + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { - 'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview', + 'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/', 'info_dict': { 'id': '9635', 'display_id': 'broadcast-dub-preview', @@ -54,25 +46,13 @@ class FunimationIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(?:jpg|png)', }, 'skip': 'Access without user interaction is forbidden by CloudFlare', + }, { + 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/', + 'only_matching': True, }] _LOGIN_URL = 'http://www.funimation.com/login' - def _download_webpage(self, *args, **kwargs): - try: - return super(FunimationIE, self)._download_webpage(*args, **kwargs) - except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: - response = ee.cause.read() - if b'>Please complete the security check to access<' in response: - raise ExtractorError( - 'Access to funimation.com is blocked by CloudFlare. ' - 'Please browse to http://www.funimation.com/, solve ' - 'the reCAPTCHA, export browser cookies to a text file,' - ' and then try again with --cookies YOUR_COOKIE_FILE.', - expected=True) - raise - def _extract_cloudflare_session_ua(self, url): ci_session_cookie = self._get_cookies(url).get('ci_session') if ci_session_cookie: @@ -114,119 +94,74 @@ class FunimationIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) - errors = [] - formats = [] - - ERRORS_MAP = { - 'ERROR_MATURE_CONTENT_LOGGED_IN': 'matureContentLoggedIn', - 'ERROR_MATURE_CONTENT_LOGGED_OUT': 'matureContentLoggedOut', - 'ERROR_SUBSCRIPTION_LOGGED_OUT': 'subscriptionLoggedOut', - 'ERROR_VIDEO_EXPIRED': 'videoExpired', - 'ERROR_TERRITORY_UNAVAILABLE': 'territoryUnavailable', - 'SVODBASIC_SUBSCRIPTION_IN_PLAYER': 'basicSubscription', - 'SVODNON_SUBSCRIPTION_IN_PLAYER': 'nonSubscription', - 'ERROR_PLAYER_NOT_RESPONDING': 'playerNotResponding', - 'ERROR_UNABLE_TO_CONNECT_TO_CDN': 'unableToConnectToCDN', - 'ERROR_STREAM_NOT_FOUND': 'streamNotFound', - } + def _search_kane(name): + return self._search_regex( + r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name, + webpage, name, default=None) + + title_data = self._parse_json(self._search_regex( + r'TITLE_DATA\s*=\s*({[^}]+})', + webpage, 'title data', default=''), + display_id, js_to_json, fatal=False) or {} + + video_id = title_data.get('id') or self._search_regex([ + r"KANE_customdimensions.videoID\s*=\s*'(\d+)';", + r']+src="/player/(\d+)"', + ], webpage, 'video_id', default=None) + if not video_id: + player_url = self._html_search_meta([ + 'al:web:url', + 'og:video:url', + 'og:video:secure_url', + ], webpage, fatal=True) + video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id') + + title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage) + series = _search_kane('showName') + if series: + title = '%s - %s' % (series, title) + description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True) - USER_AGENTS = ( - # PC UA is served with m3u8 that provides some bonus lower quality formats - ('pc', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'), - # Mobile UA allows to extract direct links and also does not fail when - # PC UA fails with hulu error (e.g. - # http://www.funimation.com/shows/hacksign/videos/official/role-play) - ('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'), - ) - - user_agent = self._extract_cloudflare_session_ua(url) - if user_agent: - USER_AGENTS = ((None, user_agent),) - - for kind, user_agent in USER_AGENTS: - request = sanitized_Request(url) - request.add_header('User-Agent', user_agent) - webpage = self._download_webpage( - request, display_id, - 'Downloading %s webpage' % kind if kind else 'Downloading webpage') - - playlist = self._parse_json( - self._search_regex( - r'var\s+playersData\s*=\s*(\[.+?\]);\n', - webpage, 'players data'), - display_id)[0]['playlist'] - - items = next(item['items'] for item in playlist if item.get('items')) - item = next(item for item in items if item.get('itemAK') == display_id) - - error_messages = {} - video_error_messages = self._search_regex( - r'var\s+videoErrorMessages\s*=\s*({.+?});\n', - webpage, 'error messages', default=None) - if video_error_messages: - error_messages_json = self._parse_json(video_error_messages, display_id, fatal=False) - if error_messages_json: - for _, error in error_messages_json.items(): - type_ = error.get('type') - description = error.get('description') - content = error.get('content') - if type_ == 'text' and description and content: - error_message = ERRORS_MAP.get(description) - if error_message: - error_messages[error_message] = content - - for video in item.get('videoSet', []): - auth_token = video.get('authToken') - if not auth_token: - continue - funimation_id = video.get('FUNImationID') or video.get('videoId') - preference = 1 if video.get('languageMode') == 'dub' else 0 - if not auth_token.startswith('?'): - auth_token = '?%s' % auth_token - for quality, height in (('sd', 480), ('hd', 720), ('hd1080', 1080)): - format_url = video.get('%sUrl' % quality) - if not format_url: - continue - if not format_url.startswith(('http', '//')): - errors.append(format_url) - continue - if determine_ext(format_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url + auth_token, display_id, 'mp4', entry_protocol='m3u8_native', - preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False)) - else: - tbr = int_or_none(self._search_regex( - r'-(\d+)[Kk]', format_url, 'tbr', default=None)) - formats.append({ - 'url': format_url + auth_token, - 'format_id': '%s-http-%dp' % (funimation_id, height), - 'height': height, - 'tbr': tbr, - 'preference': preference, - }) - - if not formats and errors: - raise ExtractorError( - '%s returned error: %s' - % (self.IE_NAME, clean_html(error_messages.get(errors[0], errors[0]))), - expected=True) + try: + sources = self._download_json( + 'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id, + video_id)['items'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + error = self._parse_json(e.cause.read(), video_id)['errors'][0] + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, error.get('detail') or error.get('title')), expected=True) + raise + formats = [] + for source in sources: + source_url = source.get('src') + if not source_url: + continue + source_type = source.get('videoType') or determine_ext(source_url) + if source_type == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, video_id, 'mp4', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': source_type, + 'url': source_url, + }) self._sort_formats(formats) - title = item['title'] - artist = item.get('artist') - if artist: - title = '%s - %s' % (artist, title) - description = self._og_search_description(webpage) or item.get('description') - thumbnail = self._og_search_thumbnail(webpage) or item.get('posterUrl') - video_id = item.get('itemId') or display_id - return { 'id': video_id, 'display_id': display_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, + 'thumbnail': self._og_search_thumbnail(webpage), + 'series': series, + 'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')), + 'episode_number': int_or_none(title_data.get('episodeNum')), + 'episode': episode, + 'season_id': title_data.get('seriesId'), 'formats': formats, } From a6f3a162f35cc05ac5a34773b438dd4c5f0d164a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 1 Apr 2017 15:35:39 +0100 Subject: [PATCH 0570/1696] [limelight] improve extraction for audio only formats --- youtube_dl/extractor/limelight.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 422be2528..f52c2e169 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -62,13 +62,21 @@ class LimelightBaseIE(InfoExtractor): fmt = { 'url': stream_url, 'abr': float_or_none(stream.get('audioBitRate')), - 'vbr': float_or_none(stream.get('videoBitRate')), 'fps': float_or_none(stream.get('videoFrameRate')), - 'width': int_or_none(stream.get('videoWidthInPixels')), - 'height': int_or_none(stream.get('videoHeightInPixels')), 'ext': ext, } - rtmp = re.search(r'^(?Prtmpe?://(?P[^/]+)/(?P.+))/(?Pmp4:.+)$', stream_url) + width = int_or_none(stream.get('videoWidthInPixels')) + height = int_or_none(stream.get('videoHeightInPixels')) + vbr = float_or_none(stream.get('videoBitRate')) + if width or height or vbr: + fmt.update({ + 'width': width, + 'height': height, + 'vbr': vbr, + }) + else: + fmt['vcodec'] = 'none' + rtmp = re.search(r'^(?Prtmpe?://(?P[^/]+)/(?P.+))/(?Pmp[34]:.+)$', stream_url) if rtmp: format_id = 'rtmp' if stream.get('videoBitRate'): From 48ab554feb9c6d3e0f13e1357e04f4c89089e2d3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 1 Apr 2017 18:09:36 +0100 Subject: [PATCH 0571/1696] [vrv] add support for series pages --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/vrv.py | 88 ++++++++++++++++++++++-------- 2 files changed, 68 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1b427e256..980333a11 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1182,7 +1182,10 @@ from .voxmedia import VoxMediaIE from .vporn import VpornIE from .vrt import VRTIE from .vrak import VrakIE -from .vrv import VRVIE +from .vrv import ( + VRVIE, + VRVSeriesIE, +) from .medialaan import MedialaanIE from .vube import VubeIE from .vuclip import VuClipIE diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 33618c951..487047fd7 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -20,22 +20,7 @@ from ..utils import ( ) -class VRVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P[A-Z0-9]+)' - _TEST = { - 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT', - 'info_dict': { - 'id': 'GR9PNZ396', - 'ext': 'mp4', - 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT', - 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f', - 'uploader_id': 'seeso', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } +class VRVBaseIE(InfoExtractor): _API_DOMAIN = None _API_PARAMS = {} _CMS_SIGNING = {} @@ -64,6 +49,8 @@ class VRVIE(InfoExtractor): note='Downloading %s JSON metadata' % note, headers=headers, data=data) def _call_cms(self, path, video_id, note): + if not self._CMS_SIGNING: + self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing'] return self._download_json( self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING, note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) @@ -75,9 +62,30 @@ class VRVIE(InfoExtractor): webpage, 'api config'), video_id)['cxApiParams'] self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') - def _set_cms_signing(self, video_id): - if not self._CMS_SIGNING: - self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing'] + def _get_cms_resource(self, resource_key, video_id): + return self._call_api( + 'cms_resource', video_id, 'resource path', data={ + 'resource_key': resource_key, + })['__links__']['cms_resource']['href'] + + +class VRVIE(VRVBaseIE): + IE_NAME = 'vrv' + _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P[A-Z0-9]+)' + _TEST = { + 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT', + 'info_dict': { + 'id': 'GR9PNZ396', + 'ext': 'mp4', + 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT', + 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f', + 'uploader_id': 'seeso', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } def _real_extract(self, url): video_id = self._match_id(url) @@ -91,10 +99,8 @@ class VRVIE(InfoExtractor): video_data = media_resource.get('json') if not video_data: self._set_api_params(webpage, video_id) - episode_path = self._call_api('cms_resource', video_id, 'episode resource path', data={ - 'resource_key': 'cms:/episodes/' + video_id, - })['__links__']['cms_resource']['href'] - self._set_cms_signing(video_id) + episode_path = self._get_cms_resource( + 'cms:/episodes/' + video_id, video_id) video_data = self._call_cms(episode_path, video_id, 'video') title = video_data['title'] @@ -102,7 +108,6 @@ class VRVIE(InfoExtractor): if not streams_json: self._set_api_params(webpage, video_id) streams_path = video_data['__links__']['streams']['href'] - self._set_cms_signing(video_id) streams_json = self._call_cms(streams_path, video_id, 'streams') audio_locale = streams_json.get('audio_locale') @@ -149,3 +154,38 @@ class VRVIE(InfoExtractor): 'episode_number': int_or_none(video_data.get('episode_number')), 'episode_id': video_data.get('production_episode_id'), } + + +class VRVSeriesIE(VRVBaseIE): + IE_NAME = 'vrv:series' + _VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P[A-Z0-9]+)' + _TEST = { + 'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider', + 'info_dict': { + 'id': 'G68VXG3G6', + }, + 'playlist_mincount': 11, + } + + def _real_extract(self, url): + series_id = self._match_id(url) + webpage = self._download_webpage( + url, series_id, + headers=self.geo_verification_headers()) + + self._set_api_params(webpage, series_id) + seasons_path = self._get_cms_resource( + 'cms:/seasons?series_id=' + series_id, series_id) + seasons_data = self._call_cms(seasons_path, series_id, 'seasons') + + entries = [] + for season in seasons_data.get('items', []): + episodes_path = season['__links__']['season/episodes']['href'] + episodes = self._call_cms(episodes_path, series_id, 'episodes') + for episode in episodes.get('items', []): + episode_id = episode['id'] + entries.append(self.url_result( + 'https://vrv.co/watch/' + episode_id, + 'VRV', episode_id, episode.get('title'))) + + return self.playlist_result(entries, series_id) From 51342717cddafde83dbf39f2212be40a196a577a Mon Sep 17 00:00:00 2001 From: Timendum Date: Tue, 14 Mar 2017 16:11:09 +0100 Subject: [PATCH 0572/1696] [rai] Fix extraction --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/rai.py | 355 ++++++++++++++++------------- 2 files changed, 195 insertions(+), 162 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 980333a11..d9e8d53ac 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -802,7 +802,7 @@ from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import ( - RaiTVIE, + RaiPlayIE, RaiIE, ) from .rbmaradio import RBMARadioIE diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 41afbd9af..b67e94f88 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -3,8 +3,8 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( - determine_ext, ExtractorError, + determine_ext, find_xpath_attr, fix_xml_ampersands, int_or_none, @@ -55,181 +55,157 @@ class RaiBaseIE(InfoExtractor): return formats - def _extract_from_content_id(self, content_id, base_url): - media = self._download_json( - 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id, - content_id, 'Downloading video JSON') - thumbnails = [] - for image_type in ('image', 'image_medium', 'image_300'): - thumbnail_url = media.get(image_type) - if thumbnail_url: - thumbnails.append({ - 'url': compat_urlparse.urljoin(base_url, thumbnail_url), - }) +class RaiPlayIE(RaiBaseIE): + _VALID_URL = r'https?://(?:www\.)?raiplay\.it/.+?-(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.html' + _TESTS = [{ + 'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter', + 'md5': '340aa3b7afb54bfd14a8c11786450d76', + 'info_dict': { + 'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66', + 'ext': 'mp4', + 'title': 'La Casa Bianca', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': r're:^Rai.+', + 'description': 're:^[A-Za-z]+' + } + }, { + 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', + 'md5': 'ed4da3d70ccf8129a33ab16b34d20ab8', + 'info_dict': { + 'id': 'efebe701-969c-4593-92f3-285f0d1ce750', + 'ext': 'mp4', + 'title': 'Gazebo - #gazebotraindesi', + 'thumbnail': r're:^https?://.*\.png$', + 'uploader': r're:^Rai.+', + 'description': r're:^[A-Za-z]+' + } + }, { + 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', + 'md5': '8970abf8caf8aef4696e7b1f2adfc696', + 'info_dict': { + 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', + 'ext': 'mp4', + 'title': 'Report - Report del 07/04/2014', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': r're:^Rai.+', + 'description': r're:^[A-Za-z]+' + } + }] + _RESOLUTION = '600x400' - formats = [] - media_type = media['type'] - if 'Audio' in media_type: - formats.append({ - 'format_id': media.get('formatoAudio'), - 'url': media['audioUrl'], - 'ext': media.get('formatoAudio'), - }) - elif 'Video' in media_type: - formats.extend(self._extract_relinker_formats(media['mediaUri'], content_id)) - self._sort_formats(formats) - else: - raise ExtractorError('not a media file') + def _real_extract(self, url): + video_id = self._match_id(url) - subtitles = {} - captions = media.get('subtitlesUrl') - if captions: - STL_EXT = '.stl' - SRT_EXT = '.srt' - if captions.endswith(STL_EXT): - captions = captions[:-len(STL_EXT)] + SRT_EXT - subtitles['it'] = [{ - 'ext': 'srt', - 'url': captions, - }] + # remove query and fragment part from url + canonical_url = compat_urlparse.urljoin(url, compat_urlparse.urlparse(url).path) + webpage = self._download_webpage(canonical_url, video_id) - return { - 'id': content_id, - 'title': media['name'], - 'description': media.get('desc'), - 'thumbnails': thumbnails, - 'uploader': media.get('author'), - 'upload_date': unified_strdate(media.get('date')), - 'duration': parse_duration(media.get('length')), - 'formats': formats, - 'subtitles': subtitles, - } + media = self._download_json('%s?json' % canonical_url, + video_id, 'Downloading video JSON') + thumbnails = [] + if 'images' in media: + for _, value in media.get('images').items(): + if value: + thumbnails.append({ + 'url': value.replace('[RESOLUTION]', self._RESOLUTION) + }) -class RaiTVIE(RaiBaseIE): - _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+(?:media|ondemand)/.+?-(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' - _TESTS = [ - { - 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', - 'md5': '8970abf8caf8aef4696e7b1f2adfc696', - 'info_dict': { - 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', - 'ext': 'mp4', - 'title': 'Report del 07/04/2014', - 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', - 'upload_date': '20140407', - 'duration': 6160, - 'thumbnail': r're:^https?://.*\.jpg$', - } - }, - { - # no m3u8 stream - 'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', - # HDS download, MD5 is unstable - 'info_dict': { - 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', - 'ext': 'flv', - 'title': 'TG PRIMO TEMPO', - 'upload_date': '20140612', - 'duration': 1758, - 'thumbnail': r're:^https?://.*\.jpg$', - }, - 'skip': 'Geo-restricted to Italy', - }, - { - 'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', - 'md5': '35cf7c229f22eeef43e48b5cf923bef0', - 'info_dict': { - 'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13', - 'ext': 'mp4', - 'title': 'State of the Net, Antonella La Carpia: regole virali', - 'description': 'md5:b0ba04a324126903e3da7763272ae63c', - 'upload_date': '20140613', - }, - 'skip': 'Error 404', - }, - { - 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html', - 'info_dict': { - 'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132', - 'ext': 'mp4', - 'title': 'Alluvione in Sardegna e dissesto idrogeologico', - 'description': 'Edizione delle ore 20:30 ', - }, - 'skip': 'invalid urls', - }, - { - 'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html', - 'md5': 'e57493e1cb8bc7c564663f363b171847', - 'info_dict': { - 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6', - 'ext': 'mp4', - 'title': 'Il Candidato - Primo episodio: "Le Primarie"', - 'description': 'md5:364b604f7db50594678f483353164fb8', - 'upload_date': '20140923', - 'duration': 386, - 'thumbnail': r're:^https?://.*\.jpg$', - } - }, - ] + if 'video' not in media: + raise ExtractorError('No video found') - def _real_extract(self, url): - video_id = self._match_id(url) + video = media.get('video') + duration = parse_duration(video.get('duration')), + formats = self._extract_relinker_formats(video.get('contentUrl'), video_id) + self._sort_formats(formats) - return self._extract_from_content_id(video_id, url) + return { + 'id': video_id, + 'title': self._og_search_title(webpage).replace(' - video - RaiPlay', ''), + 'description': self._og_search_description(webpage), + 'uploader': media.get('channel'), + 'duration': duration, + 'thumbnails': thumbnails, + 'formats': formats + } class RaiIE(RaiBaseIE): - _VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' - _TESTS = [ - { - 'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', - 'md5': '2dd727e61114e1ee9c47f0da6914e178', - 'info_dict': { - 'id': '59d69d28-6bb6-409d-a4b5-ed44096560af', - 'ext': 'mp4', - 'title': 'Il pacco', - 'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', - 'upload_date': '20141221', - }, + _VALID_URL = r'https?://.+\.(?:rai|rainews)\.it/dl/.+?-(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' + _TESTS = [{ + # subdomain test case + 'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', + 'info_dict': { + 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', + 'ext': 'mp4', + 'title': 'TG PRIMO TEMPO', + 'upload_date': '20140612', + 'duration': 1758, + 'thumbnail': r're:^https?://.*\.jpg$' + } + }, { + # rainews test case + 'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html', + 'info_dict': { + 'id': '1632c009-c843-4836-bb65-80c33084a64b', + 'ext': 'mp4', + 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor \"La ragazza del treno\" ', + 'upload_date': '20161103', + 'thumbnail': r're:^https?://.*\.png$', + 'description': r're:^[A-Za-z]+' + } + }, { + # with media information + 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html', + 'md5': '11959b4e44fa74de47011b5799490adf', + 'info_dict': { + 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c', + 'ext': 'mp4', + 'title': 'TG1 ore 20:00 del 03/11/2016', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20161103', + 'description': r're:^[A-Za-z]+' + } + }, { + # drawMediaRaiTV test case + 'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', + 'md5': '2dd727e61114e1ee9c47f0da6914e178', + 'info_dict': { + 'id': '59d69d28-6bb6-409d-a4b5-ed44096560af', + 'ext': 'mp4', + 'title': 'Il pacco', + 'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', + 'upload_date': '20141221', }, - { - # Direct relinker URL - 'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews', - # HDS live stream, MD5 is unstable - 'info_dict': { - 'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc', - 'ext': 'flv', - 'title': 'EuroNews', - }, - 'skip': 'Geo-restricted to Italy', + }, { + # Direct relinker URL + 'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews', + # HDS live stream, MD5 is unstable + 'info_dict': { + 'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc', + 'ext': 'flv', + 'title': 'EuroNews', }, - { - # Embedded content item ID - 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', - 'md5': '84c1135ce960e8822ae63cec34441d63', - 'info_dict': { - 'id': '0960e765-62c8-474a-ac4b-7eb3e2be39c8', - 'ext': 'mp4', - 'title': 'TG1 ore 20:00 del 02/07/2016', - 'upload_date': '20160702', - }, + }, { + # Embedded content item ID + 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', + 'info_dict': { + 'id': 'd80d4b70-3812-4501-a888-92edec729f00', + 'ext': 'mp4', + 'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}', + 'upload_date': r're:\d{8}', + 'description': r're:.+', }, - { - 'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', - # HDS live stream, MD5 is unstable - 'info_dict': { - 'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9', - 'ext': 'flv', - 'title': 'La diretta di Rainews24', - }, + }, { + 'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', + # HDS live stream, MD5 is unstable + 'info_dict': { + 'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9', + 'ext': 'mp4', + 'title': 'La diretta di Rainews24', }, - ] - - @classmethod - def suitable(cls, url): - return False if RaiTVIE.suitable(url) else super(RaiIE, cls).suitable(url) + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -250,6 +226,12 @@ class RaiIE(RaiBaseIE): if content_item_id: return self._extract_from_content_id(content_item_id, url) + try: + return self._extract_from_content_id(video_id, url) + except ExtractorError: + # no media data, only direct relinker + pass + relinker_url = compat_urlparse.urljoin(url, self._search_regex( r'(?:var\s+videoURL|mediaInfo\.mediaUri)\s*=\s*(?P[\'"])(?P(https?:)?//mediapolis\.rai\.it/relinker/relinkerServlet\.htm\?cont=\d+)(?P=q1)', webpage, 'relinker URL', group='url')) @@ -265,3 +247,54 @@ class RaiIE(RaiBaseIE): 'title': title, 'formats': formats, } + + def _extract_from_content_id(self, content_id, url): + media = self._download_json( + 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id, + content_id, 'Downloading video JSON') + + thumbnails = [] + for image_type in ('image', 'image_medium', 'image_300'): + thumbnail_url = media.get(image_type) + if thumbnail_url: + thumbnails.append({ + 'url': compat_urlparse.urljoin(url, thumbnail_url), + }) + + formats = [] + media_type = media['type'] + if 'Audio' in media_type: + formats.append({ + 'format_id': media.get('formatoAudio'), + 'url': media['audioUrl'], + 'ext': media.get('formatoAudio'), + }) + elif 'Video' in media_type: + formats.extend(self._extract_relinker_formats(media['mediaUri'], content_id)) + self._sort_formats(formats) + else: + raise ExtractorError('not a media file') + + subtitles = {} + captions = media.get('subtitlesUrl') + if captions: + STL_EXT = '.stl' + SRT_EXT = '.srt' + if captions.endswith(STL_EXT): + captions = captions[:-len(STL_EXT)] + SRT_EXT + subtitles['it'] = [{ + 'ext': 'srt', + 'url': captions, + }] + + return { + 'id': content_id, + 'title': media['name'], + 'description': media.get('desc'), + 'thumbnails': thumbnails, + 'uploader': media.get('author'), + 'upload_date': unified_strdate(media.get('date')), + 'duration': parse_duration(media.get('length')), + 'formats': formats, + 'subtitles': subtitles, + } From b8d8cced9b55c57f3b09e83972be9d6318a459ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Apr 2017 02:14:42 +0700 Subject: [PATCH 0573/1696] [rai] Improve extraction (closes #11790) * Fix georestriction detection * Detect live streams + Extract relinker metadata * Improve ContentItem detection + Extract series metadata * Fix tests --- youtube_dl/extractor/rai.py | 359 +++++++++++++++++++++++------------- 1 file changed, 233 insertions(+), 126 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index b67e94f88..b77b0a08e 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -1,23 +1,40 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_urlparse, + compat_str, +) from ..utils import ( ExtractorError, determine_ext, find_xpath_attr, fix_xml_ampersands, + GeoRestrictedError, int_or_none, parse_duration, + strip_or_none, + try_get, unified_strdate, + unified_timestamp, update_url_query, + urljoin, xpath_text, ) class RaiBaseIE(InfoExtractor): - def _extract_relinker_formats(self, relinker_url, video_id): + _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' + _GEO_COUNTRIES = ['IT'] + _GEO_BYPASS = False + + def _extract_relinker_info(self, relinker_url, video_id): formats = [] + geoprotection = None + is_live = None + duration = None for platform in ('mon', 'flash', 'native'): relinker = self._download_xml( @@ -27,9 +44,27 @@ class RaiBaseIE(InfoExtractor): query={'output': 45, 'pl': platform}, headers=self.geo_verification_headers()) - media_url = find_xpath_attr(relinker, './url', 'type', 'content').text + if not geoprotection: + geoprotection = xpath_text( + relinker, './geoprotection', default=None) == 'Y' + + if not is_live: + is_live = xpath_text( + relinker, './is_live', default=None) == 'Y' + if not duration: + duration = parse_duration(xpath_text( + relinker, './duration', default=None)) + + url_elem = find_xpath_attr(relinker, './url', 'type', 'content') + if url_elem is None: + continue + + media_url = url_elem.text + + # This does not imply geo restriction (e.g. + # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) if media_url == 'http://download.rai.it/video_no_available.mp4': - self.raise_geo_restricted() + continue ext = determine_ext(media_url) if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'): @@ -53,11 +88,18 @@ class RaiBaseIE(InfoExtractor): 'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http', }) - return formats + if not formats and geoprotection is True: + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + + return dict((k, v) for k, v in { + 'is_live': is_live, + 'duration': duration, + 'formats': formats, + }.items() if v is not None) class RaiPlayIE(RaiBaseIE): - _VALID_URL = r'https?://(?:www\.)?raiplay\.it/.+?-(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.html' + _VALID_URL = r'(?Phttps?://(?:www\.)?raiplay\.it/.+?-(?P%s)\.html)' % RaiBaseIE._UUID_RE _TESTS = [{ 'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter', 'md5': '340aa3b7afb54bfd14a8c11786450d76', @@ -65,110 +107,130 @@ class RaiPlayIE(RaiBaseIE): 'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66', 'ext': 'mp4', 'title': 'La Casa Bianca', + 'alt_title': 'S2016 - Puntata del 23/10/2016', + 'description': 'md5:a09d45890850458077d1f68bb036e0a5', 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': r're:^Rai.+', - 'description': 're:^[A-Za-z]+' - } - }, { - 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', - 'md5': 'ed4da3d70ccf8129a33ab16b34d20ab8', - 'info_dict': { - 'id': 'efebe701-969c-4593-92f3-285f0d1ce750', - 'ext': 'mp4', - 'title': 'Gazebo - #gazebotraindesi', - 'thumbnail': r're:^https?://.*\.png$', - 'uploader': r're:^Rai.+', - 'description': r're:^[A-Za-z]+' - } + 'uploader': 'Rai 3', + 'creator': 'Rai 3', + 'duration': 3278, + 'timestamp': 1477764300, + 'upload_date': '20161029', + 'series': 'La Casa Bianca', + 'season': '2016', + }, }, { 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 'md5': '8970abf8caf8aef4696e7b1f2adfc696', 'info_dict': { 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', 'ext': 'mp4', - 'title': 'Report - Report del 07/04/2014', + 'title': 'Report del 07/04/2014', + 'alt_title': 'S2013/14 - Puntata del 07/04/2014', + 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': r're:^Rai.+', - 'description': r're:^[A-Za-z]+' - } + 'uploader': 'Rai 5', + 'creator': 'Rai 5', + 'duration': 6160, + 'series': 'Report', + 'season_number': 5, + 'season': '2013/14', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', + 'only_matching': True, }] - _RESOLUTION = '600x400' def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + url, video_id = mobj.group('url', 'id') - # remove query and fragment part from url - canonical_url = compat_urlparse.urljoin(url, compat_urlparse.urlparse(url).path) - webpage = self._download_webpage(canonical_url, video_id) + media = self._download_json( + '%s?json' % url, video_id, 'Downloading video JSON') - media = self._download_json('%s?json' % canonical_url, - video_id, 'Downloading video JSON') + title = media['name'] + + video = media['video'] + + relinker_info = self._extract_relinker_info(video['contentUrl'], video_id) + self._sort_formats(relinker_info['formats']) thumbnails = [] if 'images' in media: for _, value in media.get('images').items(): if value: thumbnails.append({ - 'url': value.replace('[RESOLUTION]', self._RESOLUTION) + 'url': value.replace('[RESOLUTION]', '600x400') }) - if 'video' not in media: - raise ExtractorError('No video found') + timestamp = unified_timestamp(try_get( + media, lambda x: x['availabilities'][0]['start'], compat_str)) - video = media.get('video') - duration = parse_duration(video.get('duration')), - formats = self._extract_relinker_formats(video.get('contentUrl'), video_id) - self._sort_formats(formats) - - return { + info = { 'id': video_id, - 'title': self._og_search_title(webpage).replace(' - video - RaiPlay', ''), - 'description': self._og_search_description(webpage), + 'title': title, + 'alt_title': media.get('subtitle'), + 'description': media.get('description'), 'uploader': media.get('channel'), - 'duration': duration, + 'creator': media.get('editor'), + 'duration': parse_duration(video.get('duration')), + 'timestamp': timestamp, 'thumbnails': thumbnails, - 'formats': formats + 'series': try_get( + media, lambda x: x['isPartOf']['name'], compat_str), + 'season_number': int_or_none(try_get( + media, lambda x: x['isPartOf']['numeroStagioni'])), + 'season': media.get('stagione') or None, } + info.update(relinker_info) + + return info + class RaiIE(RaiBaseIE): - _VALID_URL = r'https?://.+\.(?:rai|rainews)\.it/dl/.+?-(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' + _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE _TESTS = [{ - # subdomain test case + # var uniquename = "ContentItem-..." + # data-id="ContentItem-..." 'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', 'info_dict': { 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', 'ext': 'mp4', 'title': 'TG PRIMO TEMPO', - 'upload_date': '20140612', + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 1758, - 'thumbnail': r're:^https?://.*\.jpg$' + 'upload_date': '20140612', } }, { - # rainews test case + # with ContentItem in many metas 'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html', 'info_dict': { 'id': '1632c009-c843-4836-bb65-80c33084a64b', 'ext': 'mp4', - 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor \"La ragazza del treno\" ', - 'upload_date': '20161103', + 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"', + 'description': 'I film in uscita questa settimana.', 'thumbnail': r're:^https?://.*\.png$', - 'description': r're:^[A-Za-z]+' + 'duration': 833, + 'upload_date': '20161103', } }, { - # with media information + # with ContentItem in og:url 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html', 'md5': '11959b4e44fa74de47011b5799490adf', 'info_dict': { 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c', 'ext': 'mp4', 'title': 'TG1 ore 20:00 del 03/11/2016', + 'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2214, 'upload_date': '20161103', - 'description': r're:^[A-Za-z]+' } }, { - # drawMediaRaiTV test case + # drawMediaRaiTV(...) 'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', 'md5': '2dd727e61114e1ee9c47f0da6914e178', 'info_dict': { @@ -176,83 +238,67 @@ class RaiIE(RaiBaseIE): 'ext': 'mp4', 'title': 'Il pacco', 'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20141221', }, }, { - # Direct relinker URL + # initEdizione('ContentItem-...' + 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', + 'info_dict': { + 'id': 'c2187016-8484-4e3a-8ac8-35e475b07303', + 'ext': 'mp4', + 'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}', + 'duration': 2274, + 'upload_date': '20170401', + }, + 'skip': 'Changes daily', + }, { + # HDS live stream with only relinker URL 'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews', - # HDS live stream, MD5 is unstable 'info_dict': { 'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc', 'ext': 'flv', 'title': 'EuroNews', }, - }, { - # Embedded content item ID - 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', - 'info_dict': { - 'id': 'd80d4b70-3812-4501-a888-92edec729f00', - 'ext': 'mp4', - 'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}', - 'upload_date': r're:\d{8}', - 'description': r're:.+', + 'params': { + 'skip_download': True, }, }, { + # HLS live stream with ContentItem in og:url 'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', - # HDS live stream, MD5 is unstable 'info_dict': { 'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9', 'ext': 'mp4', 'title': 'La diretta di Rainews24', }, + 'params': { + 'skip_download': True, + }, }] - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - iframe_url = self._search_regex( - [r']+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"', - r'drawMediaRaiTV\(["\'](.+?)["\']'], - webpage, 'iframe', default=None) - if iframe_url: - if not iframe_url.startswith('http'): - iframe_url = compat_urlparse.urljoin(url, iframe_url) - return self.url_result(iframe_url) - - content_item_id = self._search_regex( - r'initEdizione\((?P[\'"])ContentItem-(?P[^\'"]+)(?P=q1)', - webpage, 'content item ID', group='content_id', default=None) - if content_item_id: - return self._extract_from_content_id(content_item_id, url) - - try: - return self._extract_from_content_id(video_id, url) - except ExtractorError: - # no media data, only direct relinker - pass - - relinker_url = compat_urlparse.urljoin(url, self._search_regex( - r'(?:var\s+videoURL|mediaInfo\.mediaUri)\s*=\s*(?P[\'"])(?P(https?:)?//mediapolis\.rai\.it/relinker/relinkerServlet\.htm\?cont=\d+)(?P=q1)', - webpage, 'relinker URL', group='url')) - formats = self._extract_relinker_formats(relinker_url, video_id) - self._sort_formats(formats) - - title = self._search_regex( - r'var\s+videoTitolo\s*=\s*([\'"])(?P[^\'"]+)\1', - webpage, 'title', group='title', default=None) or self._og_search_title(webpage) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - } - def _extract_from_content_id(self, content_id, url): media = self._download_json( 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id, content_id, 'Downloading video JSON') + title = media['name'].strip() + + media_type = media['type'] + if 'Audio' in media_type: + relinker_info = { + 'formats': { + 'format_id': media.get('formatoAudio'), + 'url': media['audioUrl'], + 'ext': media.get('formatoAudio'), + } + } + elif 'Video' in media_type: + relinker_info = self._extract_relinker_info(media['mediaUri'], content_id) + else: + raise ExtractorError('not a media file') + + self._sort_formats(relinker_info['formats']) + thumbnails = [] for image_type in ('image', 'image_medium', 'image_300'): thumbnail_url = media.get(image_type) @@ -261,20 +307,6 @@ class RaiIE(RaiBaseIE): 'url': compat_urlparse.urljoin(url, thumbnail_url), }) - formats = [] - media_type = media['type'] - if 'Audio' in media_type: - formats.append({ - 'format_id': media.get('formatoAudio'), - 'url': media['audioUrl'], - 'ext': media.get('formatoAudio'), - }) - elif 'Video' in media_type: - formats.extend(self._extract_relinker_formats(media['mediaUri'], content_id)) - self._sort_formats(formats) - else: - raise ExtractorError('not a media file') - subtitles = {} captions = media.get('subtitlesUrl') if captions: @@ -287,14 +319,89 @@ class RaiIE(RaiBaseIE): 'url': captions, }] - return { + info = { 'id': content_id, - 'title': media['name'], - 'description': media.get('desc'), + 'title': title, + 'description': strip_or_none(media.get('desc')), 'thumbnails': thumbnails, 'uploader': media.get('author'), 'upload_date': unified_strdate(media.get('date')), 'duration': parse_duration(media.get('length')), - 'formats': formats, 'subtitles': subtitles, } + + info.update(relinker_info) + + return info + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + content_item_id = None + + content_item_url = self._html_search_meta( + ('og:url', 'og:video', 'og:video:secure_url', 'twitter:url', + 'twitter:player', 'jsonlink'), webpage, default=None) + if content_item_url: + content_item_id = self._search_regex( + r'ContentItem-(%s)' % self._UUID_RE, content_item_url, + 'content item id', default=None) + + if not content_item_id: + content_item_id = self._search_regex( + r'''(?x) + (?: + (?:initEdizione|drawMediaRaiTV)\(| + <(?:[^>]+\bdata-id|var\s+uniquename)= + ) + (["\']) + (?:(?!\1).)*\bContentItem-(?P<id>%s) + ''' % self._UUID_RE, + webpage, 'content item id', default=None, group='id') + + content_item_ids = set() + content_item_ids.add(content_item_id) + if video_id not in content_item_ids: + content_item_ids.add(video_id) + + for content_item_id in content_item_ids: + try: + return self._extract_from_content_id(content_item_id, url) + except GeoRestrictedError: + raise + except ExtractorError: + pass + + relinker_url = self._search_regex( + r'''(?x) + (?: + var\s+videoURL| + mediaInfo\.mediaUri + )\s*=\s* + ([\'"]) + (?P<url> + (?:https?:)? + //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? + (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 + ''', + webpage, 'relinker URL', group='url') + + relinker_info = self._extract_relinker_info( + urljoin(url, relinker_url), video_id) + self._sort_formats(relinker_info['formats']) + + title = self._search_regex( + r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1', + webpage, 'title', group='title', + default=None) or self._og_search_title(webpage) + + info = { + 'id': video_id, + 'title': title, + } + + info.update(relinker_info) + + return info From 361f293ab85c29ab62cb91577d2be34814d5c552 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Apr 2017 02:24:13 +0700 Subject: [PATCH 0574/1696] [rai] Skip not found content item id --- youtube_dl/extractor/rai.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index b77b0a08e..077546a73 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -362,7 +362,8 @@ class RaiIE(RaiBaseIE): webpage, 'content item id', default=None, group='id') content_item_ids = set() - content_item_ids.add(content_item_id) + if content_item_id: + content_item_ids.add(content_item_id) if video_id not in content_item_ids: content_item_ids.add(video_id) From a76c25146a93052f367a0fb8cdd9a08ba9cef491 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Apr 2017 02:37:18 +0700 Subject: [PATCH 0575/1696] [ChangeLog] Actualize --- ChangeLog | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ChangeLog b/ChangeLog index 07725b12a..3ffc647f1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +version <unreleased> + +Core +[YoutubeDL] Return early when extraction of url_transparent fails + +Extractors +* [rai] Fix and improve extraction (#11790) ++ [vrv] Add support for series pages +* [limelight] Improve extraction for audio only formats +* [funimation] Fix extraction (#10696, #11773) ++ [xfileshare] Add support for vidabc.com (#12589) ++ [xfileshare] Improve extraction and extract hls formats ++ [crunchyroll] Pass geo verifcation proxy ++ [cwtv] Extract ISM formats ++ [tvplay] Bypass geo restriction ++ [vrv] Add support for vrv.co ++ [packtpub] Add support for packtpub.com (#12610) ++ [generic] Pass base_url to _parse_jwplayer_data ++ [adn] Add support for animedigitalnetwork.fr (#4866) ++ [allocine] Extract more metadata +* [allocine] Fix extraction (#12592) +* [openload] Fix extraction + + version 2017.03.26 Core From b56e41a701d73072b7d62a151b7aafd87955dfe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Apr 2017 02:39:15 +0700 Subject: [PATCH 0576/1696] release 2017.04.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 8 ++++---- docs/supportedsites.md | 9 +++++++-- youtube_dl/version.py | 2 +- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2f717926c..c1b737619 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.26*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.26** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.03.26 +[debug] youtube-dl version 2017.04.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 3ffc647f1..0199bdf1f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2017.04.02 Core [YoutubeDL] Return early when extraction of url_transparent fails diff --git a/README.md b/README.md index 86b44781c..41f647aaa 100644 --- a/README.md +++ b/README.md @@ -181,10 +181,10 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo -R, --retries RETRIES Number of retries (default is 10), or "infinite". --fragment-retries RETRIES Number of retries for a fragment (default - is 10), or "infinite" (DASH and hlsnative - only) - --skip-unavailable-fragments Skip unavailable fragments (DASH and - hlsnative only) + is 10), or "infinite" (DASH, hlsnative and + ISM) + --skip-unavailable-fragments Skip unavailable fragments (DASH, hlsnative + and ISM) --abort-on-unavailable-fragment Abort downloading when some fragment is not available --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e9dbc021b..5c1855111 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,6 +28,7 @@ - **acast** - **acast:channel** - **AddAnime** + - **ADN**: Anime Digital Network - **AdobeTV** - **AdobeTVChannel** - **AdobeTVShow** @@ -572,6 +573,8 @@ - **orf:iptv**: iptv.ORF.at - **orf:oe1**: Radio Österreich 1 - **orf:tvthek**: ORF TVthek + - **PacktPub** + - **PacktPubCourse** - **PandaTV**: 熊猫TV - **pandora.tv**: 판도라TV - **parliamentlive.tv**: UK parliament videos @@ -629,7 +632,7 @@ - **radiofrance** - **RadioJavan** - **Rai** - - **RaiTV** + - **RaiPlay** - **RBMARadio** - **RDS**: RDS.ca - **RedBullTV** @@ -926,6 +929,8 @@ - **vpro**: npo.nl and ntr.nl - **Vrak** - **VRT** + - **vrv** + - **vrv:series** - **vube**: Vube.com - **VuClip** - **VVVVID** @@ -953,7 +958,7 @@ - **WSJ**: Wall Street Journal - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE + - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC - **XHamster** - **XHamsterEmbed** - **xiami:album**: 虾米音乐 - 专辑 diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 94e8198ec..f612d03ca 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.03.26' +__version__ = '2017.04.02' From b3633fa0ce0f98801582f8e4e348436b0f361eb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Apr 2017 03:20:28 +0700 Subject: [PATCH 0577/1696] [pericope] Add support for pscp.tv URLs --- youtube_dl/extractor/periscope.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 0e3623024..1add6b840 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -20,7 +20,7 @@ class PeriscopeBaseIE(InfoExtractor): class PeriscopeIE(PeriscopeBaseIE): IE_DESC = 'Periscope' IE_NAME = 'periscope' - _VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)' # Alive example URLs can be found here http://onperiscope.com/ _TESTS = [{ 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', @@ -41,6 +41,9 @@ class PeriscopeIE(PeriscopeBaseIE): }, { 'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX', 'only_matching': True, + }, { + 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', + 'only_matching': True, }] @staticmethod @@ -103,7 +106,7 @@ class PeriscopeIE(PeriscopeBaseIE): class PeriscopeUserIE(PeriscopeBaseIE): - _VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$' + _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$' IE_DESC = 'Periscope user videos' IE_NAME = 'periscope:user' From 4457823dda410c5406f5ab5474b9b1f9325fa7ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Apr 2017 03:56:49 +0700 Subject: [PATCH 0578/1696] [extractor/common] Move censorship checks to a separate method and add check for just another ISP --- youtube_dl/extractor/common.py | 48 ++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6c3c095f7..cdfa7000b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -547,6 +547,34 @@ class InfoExtractor(object): return encoding + def __check_blocked(self, content): + first_block = content[:512] + if ('<title>Access to this site is blocked' in content and + 'Websense' in first_block): + msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' + blocked_iframe = self._html_search_regex( + r'' + PLAYER_REGEX = r'