From a28ccbabc60c81016c851ae46365be377ea83795 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 31 Mar 2015 02:21:27 +0800 Subject: [PATCH 1/6] [Yahoo/NBCSports] Fix #5226 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nbc.py | 21 +++++++++++++++++++++ youtube_dl/extractor/yahoo.py | 14 ++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 43bac0252..5d0d2a9bc 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -310,6 +310,7 @@ from .naver import NaverIE from .nba import NBAIE from .nbc import ( NBCIE, + NBCSportsIE, NBCNewsIE, ) from .ndr import NDRIE diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 80a01c778..033bf71f0 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -50,6 +50,27 @@ class NBCIE(InfoExtractor): return self.url_result(theplatform_url) +class NBCSportsIE(InfoExtractor): + _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P[0-9a-zA-Z]+)' + + _TEST = { + 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', + 'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5', + 'info_dict': { + 'id': '9CsDKds0kvHI', + 'ext': 'flv', + 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', + 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + theplatform_url = self._og_search_video_url(webpage) + return self.url_result(theplatform_url, 'ThePlatform') + + class NBCNewsIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ (?:video/.+?/(?P\d+)| diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 97dbac4cc..6e72f1e55 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -129,6 +129,15 @@ class YahooIE(InfoExtractor): }, { 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html', 'only_matching': True, + }, { + 'note': 'NBC Sports embeds', + 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', + 'info_dict': { + 'id': '9CsDKds0kvHI', + 'ext': 'flv', + 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', + 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', + } } ] @@ -151,6 +160,11 @@ class YahooIE(InfoExtractor): items = json.loads(items_json) video_id = items[0]['id'] return self._get_info(video_id, display_id, webpage) + # Look for NBCSports iframes + iframe_m = re.search( + r']+src="(?Phttps?://vplayer\.nbcsports\.com/[^"]+)"', webpage) + if iframe_m: + return self.url_result(iframe_m.group('url'), 'NBCSports') items_json = self._search_regex( r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, From a2a4d5fa313d5244d24fa70d5db91971a7583d79 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 31 Mar 2015 02:47:18 +0800 Subject: [PATCH 2/6] [Yahoo/NBCSports] Generalize NBC sports info extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nbc.py | 31 ++++++++++++++++++++++++++++++- youtube_dl/extractor/yahoo.py | 10 ++++++---- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5d0d2a9bc..b113aaec6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -310,6 +310,7 @@ from .naver import NaverIE from .nba import NBAIE from .nbc import ( NBCIE, + NBCSportsVPlayerIE, NBCSportsIE, NBCNewsIE, ) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 033bf71f0..c8dd72ab4 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -50,7 +50,7 @@ class NBCIE(InfoExtractor): return self.url_result(theplatform_url) -class NBCSportsIE(InfoExtractor): +class NBCSportsVPlayerIE(InfoExtractor): _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P[0-9a-zA-Z]+)' _TEST = { @@ -64,6 +64,13 @@ class NBCSportsIE(InfoExtractor): } } + @staticmethod + def _extract_url(webpage): + iframe_m = re.search( + r']+src="(?Phttps?://vplayer\.nbcsports\.com/[^"]+)"', webpage) + if iframe_m: + return iframe_m.group('url') + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -71,6 +78,28 @@ class NBCSportsIE(InfoExtractor): return self.url_result(theplatform_url, 'ThePlatform') +class NBCSportsIE(InfoExtractor): + # Does not include https becuase its certificate is invalid + _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P[0-9a-z-]+)' + + _TEST = { + 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', + 'md5': 'ba6c93f96b67bf05344f78bd523dac0f', + 'info_dict': { + 'id': 'PHJSaFWbrTY9', + 'ext': 'flv', + 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke', + 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + return self.url_result( + NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer') + + class NBCNewsIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ (?:video/.+?/(?P\d+)| diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 6e72f1e55..43776d1e6 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -17,6 +17,8 @@ from ..utils import ( int_or_none, ) +from .nbc import NBCSportsVPlayerIE + class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' @@ -132,6 +134,7 @@ class YahooIE(InfoExtractor): }, { 'note': 'NBC Sports embeds', 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', + 'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5', 'info_dict': { 'id': '9CsDKds0kvHI', 'ext': 'flv', @@ -161,10 +164,9 @@ class YahooIE(InfoExtractor): video_id = items[0]['id'] return self._get_info(video_id, display_id, webpage) # Look for NBCSports iframes - iframe_m = re.search( - r']+src="(?Phttps?://vplayer\.nbcsports\.com/[^"]+)"', webpage) - if iframe_m: - return self.url_result(iframe_m.group('url'), 'NBCSports') + nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) + if nbc_sports_url: + return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') items_json = self._search_regex( r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, From 1d31e7a2fc2fb78c792754578a8a58b056811b84 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 31 Mar 2015 02:51:11 +0800 Subject: [PATCH 3/6] [NBCSports] Move imports alphabetically --- youtube_dl/extractor/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b113aaec6..9fddb8e32 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -310,9 +310,9 @@ from .naver import NaverIE from .nba import NBAIE from .nbc import ( NBCIE, - NBCSportsVPlayerIE, - NBCSportsIE, NBCNewsIE, + NBCSportsIE, + NBCSportsVPlayerIE, ) from .ndr import NDRIE from .ndtv import NDTVIE From a2edf2e7ff314eaa3124c1da1b962d054b6d9fff Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 31 Mar 2015 03:36:09 +0800 Subject: [PATCH 4/6] [NBC/ThePlatform/Generic] Add a generic detector for NBCSportsVPlayer and enhance error detection in ThePlatformIE --- youtube_dl/extractor/generic.py | 15 +++++++++++++++ youtube_dl/extractor/nbc.py | 2 +- youtube_dl/extractor/theplatform.py | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 042d23a13..9ddf36f6b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -29,6 +29,7 @@ from ..utils import ( xpath_text, ) from .brightcove import BrightcoveIE +from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE from .smotri import SmotriIE @@ -639,6 +640,15 @@ class GenericIE(InfoExtractor): 'upload_date': '20150228', 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } + }, + # NBC Sports vplayer embeds + { + 'url': 'http://bbs.clutchfans.net/showthread.php?t=244180', + 'info_dict': { + 'id': '_hqLjQ95yx8Z', + 'ext': 'flv' + }, + 'skip': 'This content expired on 9/17/14 12:23 PM', } ] @@ -1252,6 +1262,11 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin') + # Look for NBC Sports VPlayer embeds + nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) + if nbc_sports_url: + return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index c8dd72ab4..be9969d12 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -51,7 +51,7 @@ class NBCIE(InfoExtractor): class NBCSportsVPlayerIE(InfoExtractor): - _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P[0-9a-zA-Z]+)' + _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P[0-9a-zA-Z_]+)' _TEST = { 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index feac666f7..0e3e627f4 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -92,7 +92,7 @@ class ThePlatformIE(InfoExtractor): error_msg = next( n.attrib['abstract'] for n in meta.findall(_x('.//smil:ref')) - if n.attrib.get('title') == 'Geographic Restriction') + if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired') except StopIteration: pass else: From 5cbb2699ee04535449e37a07dd9cac9bfd224fe3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 31 Mar 2015 03:38:45 +0800 Subject: [PATCH 5/6] [NBCSports] Add a test case for extended _VALID_URL --- youtube_dl/extractor/nbc.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index be9969d12..395f53df3 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -53,7 +53,7 @@ class NBCIE(InfoExtractor): class NBCSportsVPlayerIE(InfoExtractor): _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P[0-9a-zA-Z_]+)' - _TEST = { + _TESTS = [{ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', 'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5', 'info_dict': { @@ -62,7 +62,11 @@ class NBCSportsVPlayerIE(InfoExtractor): 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', } - } + }, { + 'note': 'This video is already expired. It\'s for testing _VALID_URL', + 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z', + 'only_matching': True, + }] @staticmethod def _extract_url(webpage): From e15307a612ea588b504f1f03ba0201612df66b35 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 31 Mar 2015 13:13:29 +0800 Subject: [PATCH 6/6] [NBCSports/Yahoo] Comment out some MD5 checksums They seems to change constantly --- youtube_dl/extractor/nbc.py | 2 -- youtube_dl/extractor/yahoo.py | 1 - 2 files changed, 3 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 395f53df3..b7f6a5366 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -55,7 +55,6 @@ class NBCSportsVPlayerIE(InfoExtractor): _TESTS = [{ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', - 'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5', 'info_dict': { 'id': '9CsDKds0kvHI', 'ext': 'flv', @@ -88,7 +87,6 @@ class NBCSportsIE(InfoExtractor): _TEST = { 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', - 'md5': 'ba6c93f96b67bf05344f78bd523dac0f', 'info_dict': { 'id': 'PHJSaFWbrTY9', 'ext': 'flv', diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 43776d1e6..b777159c5 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -134,7 +134,6 @@ class YahooIE(InfoExtractor): }, { 'note': 'NBC Sports embeds', 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', - 'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5', 'info_dict': { 'id': '9CsDKds0kvHI', 'ext': 'flv',