From 60cc4dc4b49c6ebd4a86a4d7f998133474662eee Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 11 Mar 2014 16:51:36 +0100 Subject: [PATCH] [generic/funnyordie] Add support for funnyordie embeds (Fixes #2546) --- youtube_dl/extractor/funnyordie.py | 17 ++++++++++++++--- youtube_dl/extractor/generic.py | 19 +++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 7c40e6753..5522e4954 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -1,12 +1,13 @@ from __future__ import unicode_literals +import json import re from .common import InfoExtractor class FunnyOrDieIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P[0-9a-f]+)/.*$' + _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?Pembed|videos)/(?P[0-9a-f]+)(?:$|[?#/])' _TEST = { 'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version', 'file': '0732f586d7.mp4', @@ -30,10 +31,20 @@ class FunnyOrDieIE(InfoExtractor): [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''], webpage, 'video URL', flags=re.DOTALL) + if mobj.group('type') == 'embed': + post_json = self._search_regex( + r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details') + post = json.loads(post_json)['attachment'] + title = post['name'] + description = post.get('description') + else: + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + return { 'id': video_id, 'url': video_url, 'ext': 'mp4', - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), + 'title': title, + 'description': description, } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7666cf207..6e6324779 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -134,6 +134,17 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # funnyordie embed + { + 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns', + 'md5': '7cf780be104d40fea7bae52eed4a470e', + 'info_dict': { + 'id': '18e820ec3f', + 'ext': 'mp4', + 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama', + 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', + } + }, ] def report_download_webpage(self, video_id): @@ -432,6 +443,14 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(compat_urllib_parse.unquote(mobj.group('url'))) + # Look for funnyordie embed + matches = re.findall(r']+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) + if matches: + urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie') + for eurl in matches] + return self.playlist_result( + urlrs, playlist_id=video_id, playlist_title=video_title) + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: