youtube-dl/youtube_dl/extractor/generic.py

# encoding: utf-8

from __future__ import unicode_literals

import os
import re

from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import (
    compat_urllib_parse_unquote,
    compat_urllib_request,
    compat_urlparse,
    compat_xml_parse_error,
)
from ..utils import (
    determine_ext,
    ExtractorError,
    float_or_none,
    HEADRequest,
    is_html,
    orderedSet,
    parse_xml,
    smuggle_url,
    unescapeHTML,
    unified_strdate,
    unsmuggle_url,
    UnsupportedError,
    url_basename,
    xpath_text,
)
from .brightcove import BrightcoveIE
from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .tvc import TVCIE
from .sportbox import SportBoxEmbedIE
from .smotri import SmotriIE
from .myvi import MyviIE
from .condenast import CondeNastIE
from .udn import UDNEmbedIE
from .senateisvp import SenateISVPIE
from .bliptv import BlipTVIE
from .svt import SVTIE
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
from .vimeo import VimeoIE
from .dailymotion import DailymotionCloudIE
from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE
from .googledrive import GoogleDriveEmbedIE


class GenericIE(InfoExtractor):
    IE_DESC = 'Generic downloader that works on some sites'
    _VALID_URL = r'.*'
    IE_NAME = 'generic'
    _TESTS = [
        # Direct link to a video
        {
            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
            'md5': '67d406c2bcb6af27fa886f31aa934bbe',
            'info_dict': {
                'id': 'trailer',
                'ext': 'mp4',
                'title': 'trailer',
                'upload_date': '20100513',
            }
        },
        # Direct link to media delivered compressed (until Accept-Encoding is *)
        {
            'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
            'md5': '128c42e68b13950268b648275386fc74',
            'info_dict': {
                'id': 'FictionJunction-Parallel_Hearts',
                'ext': 'flac',
                'title': 'FictionJunction-Parallel_Hearts',
                'upload_date': '20140522',
            },
            'expected_warnings': [
                'URL could be a direct video link, returning it as such.'
            ]
        },
        # Direct download with broken HEAD
        {
            'url': 'http://ai-radio.org:8000/radio.opus',
            'info_dict': {
                'id': 'radio',
                'ext': 'opus',
                'title': 'radio',
            },
            'params': {
                'skip_download': True,  # infinite live stream
            },
            'expected_warnings': [
                r'501.*Not Implemented'
            ],
        },
        # Direct link with incorrect MIME type
        {
            'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
            'md5': '4ccbebe5f36706d85221f204d7eb5913',
            'info_dict': {
                'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
                'id': '5_Lennart_Poettering_-_Systemd',
                'ext': 'webm',
                'title': '5_Lennart_Poettering_-_Systemd',
                'upload_date': '20141120',
            },
            'expected_warnings': [
                'URL could be a direct video link, returning it as such.'
            ]
        },
        # RSS feed
        {
            'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
            'info_dict': {
                'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
                'title': 'Zero Punctuation',
                'description': 're:.*groundbreaking video review series.*'
            },
            'playlist_mincount': 11,
        },
        # RSS feed with enclosure
        {
            'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
            'info_dict': {
                'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
                'ext': 'm4v',
                'upload_date': '20150228',
                'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
            }
        },
        # google redirect
        {
            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
            'info_dict': {
                'id': 'cmQHVoWB5FY',
                'ext': 'mp4',
                'upload_date': '20130224',
                'uploader_id': 'TheVerge',
                'description': 're:^Chris Ziegler takes a look at the\.*',
                'uploader': 'The Verge',
                'title': 'First Firefox OS phones side-by-side',
            },
            'params': {
                'skip_download': False,
            }
        },
        {
            'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
            'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
            'info_dict': {
                'id': '13601338388002',
                'ext': 'mp4',
                'uploader': 'www.hodiho.fr',
                'title': 'R\u00e9gis plante sa Jeep',
            }
        },
        # bandcamp page with custom domain
        {
            'add_ie': ['Bandcamp'],
            'url': 'http://bronyrock.com/track/the-pony-mash',
            'info_dict': {
                'id': '3235767654',
                'ext': 'mp3',
                'title': 'The Pony Mash',
                'uploader': 'M_Pallante',
            },
            'skip': 'There is a limit of 200 free downloads / month for the test song',
        },
        # embedded brightcove video
        # it also tests brightcove videos that need to set the 'Referer' in the
        # http requests
        {
            'add_ie': ['Brightcove'],
            'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
            'info_dict': {
                'id': '2765128793001',
                'ext': 'mp4',
                'title': 'Le cours de bourse : l’analyse technique',
                'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
                'uploader': 'BFM BUSINESS',
            },
            'params': {
                'skip_download': True,
            },
        },
        {
            # https://github.com/rg3/youtube-dl/issues/2253
            'url': 'http://bcove.me/i6nfkrc3',
            'md5': '0ba9446db037002366bab3b3eb30c88c',
            'info_dict': {
                'id': '3101154703001',
                'ext': 'mp4',
                'title': 'Still no power',
                'uploader': 'thestar.com',
                'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
            },
            'add_ie': ['Brightcove'],
        },
        {
            'url': 'http://www.championat.com/video/football/v/87/87499.html',
            'md5': 'fb973ecf6e4a78a67453647444222983',
            'info_dict': {
                'id': '3414141473001',
                'ext': 'mp4',
                'title': 'Видео. Удаление Дзагоева (ЦСКА)',
                'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
                'uploader': 'Championat',
            },
        },
        {
            # https://github.com/rg3/youtube-dl/issues/3541
            'add_ie': ['Brightcove'],
            'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
            'info_dict': {
                'id': '3866516442001',
                'ext': 'mp4',
                'title': 'Leer mij vrouwen kennen: Aflevering 1',
                'description': 'Leer mij vrouwen kennen: Aflevering 1',
                'uploader': 'SBS Broadcasting',
            },
            'skip': 'Restricted to Netherlands',
            'params': {
                'skip_download': True,  # m3u8 download
            },
        },
        # ooyala video
        {
            'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
            'md5': '166dd577b433b4d4ebfee10b0824d8ff',
            'info_dict': {
                'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
                'ext': 'mp4',
                'title': '2cc213299525360.mov',  # that's what we get
            },
            'add_ie': ['Ooyala'],
        },
        # multiple ooyala embeds on SBN network websites
        {
            'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
            'info_dict': {
                'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
                'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
            },
            'playlist_mincount': 3,
            'params': {
                'skip_download': True,
            },
            'add_ie': ['Ooyala'],
        },
        # embed.ly video
        {
            'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
            'info_dict': {
                'id': '9ODmcdjQcHQ',
                'ext': 'mp4',
                'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
                'upload_date': '20140225',
                'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
                'uploader': 'Tested',
                'uploader_id': 'testedcom',
            },
            # No need to test YoutubeIE here
            'params': {
                'skip_download': True,
            },
        },
        # funnyordie embed
        {
            'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
            'info_dict': {
                'id': '18e820ec3f',
                'ext': 'mp4',
                'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
                'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
            },
        },
        # BBC iPlayer embeds
        {
            'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
            'info_dict': {
                'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
            },
            'playlist_mincount': 18,
        },
        # RUTV embed
        {
            'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
            'info_dict': {
                'id': '776940',
                'ext': 'mp4',
                'title': 'Охотское море стало целиком российским',
                'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
            },
            'params': {
                # m3u8 download
                'skip_download': True,
            },
        },
        # TVC embed
        {
            'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
            'info_dict': {
                'id': '55304',
                'ext': 'mp4',
                'title': 'Дошкольное воспитание',
            },
        },
        # SportBox embed
        {
            'url': 'http://www.vestifinance.ru/articles/25753',
            'info_dict': {
                'id': '25753',
                'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
            },
            'playlist': [{
                'info_dict': {
                    'id': '370908',
                    'title': 'Госзаказ. День 3',
                    'ext': 'mp4',
                }
            }, {
                'info_dict': {
                    'id': '370905',
                    'title': 'Госзаказ. День 2',
                    'ext': 'mp4',
                }
            }, {
                'info_dict': {
                    'id': '370902',
                    'title': 'Госзаказ. День 1',
                    'ext': 'mp4',
                }
            }],
            'params': {
                # m3u8 download
                'skip_download': True,
            },
        },
        # Myvi.ru embed
        {
            'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
            'info_dict': {
                'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
                'ext': 'mp4',
                'title': 'Ужастики, русский трейлер (2015)',
                'thumbnail': 're:^https?://.*\.jpg$',
                'duration': 153,
            }
        },
        # XHamster embed
        {
            'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
            'info_dict': {
                'id': 'showthread',
                'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
            },
            'playlist_mincount': 7,
        },
        # Embedded TED video
        {
            'url': 'http://en.support.wordpress.com/videos/ted-talks/',
            'md5': '65fdff94098e4a607385a60c5177c638',
            'info_dict': {
                'id': '1969',
                'ext': 'mp4',
                'title': 'Hidden miracles of the natural world',
                'uploader': 'Louie Schwartzberg',
                'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
            }
        },
        # Embeded Ustream video
        {
            'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
            'md5': '27b99cdb639c9b12a79bca876a073417',
            'info_dict': {
                'id': '45734260',
                'ext': 'flv',
                'uploader': 'AU SPA:  The NSA and Privacy',
                'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
            }
        },
        # nowvideo embed hidden behind percent encoding
        {
            'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
            'md5': '2baf4ddd70f697d94b1c18cf796d5107',
            'info_dict': {
                'id': '06e53103ca9aa',
                'ext': 'flv',
                'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
                'description': 'No description',
            },
        },
        # arte embed
        {
            'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
            'md5': '7653032cbb25bf6c80d80f217055fa43',
            'info_dict': {
                'id': '048195-004_PLUS7-F',
                'ext': 'flv',
                'title': 'X:enius',
                'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
                'upload_date': '20140320',
            },
            'params': {
                'skip_download': 'Requires rtmpdump'
            }
        },
        # francetv embed
        {
            'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
            'info_dict': {
                'id': 'EV_30231',
                'ext': 'mp4',
                'title': 'Alcaline, le concert avec Calogero',
                'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
                'upload_date': '20150226',
                'timestamp': 1424989860,
                'duration': 5400,
            },
            'params': {
                # m3u8 downloads
                'skip_download': True,
            },
            'expected_warnings': [
                'Forbidden'
            ]
        },
        # Condé Nast embed
        {
            'url': 'http://www.wired.com/2014/04/honda-asimo/',
            'md5': 'ba0dfe966fa007657bd1443ee672db0f',
            'info_dict': {
                'id': '53501be369702d3275860000',
                'ext': 'mp4',
                'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
            }
        },
        # Dailymotion embed
        {
            'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
            'md5': '441aeeb82eb72c422c7f14ec533999cd',
            'info_dict': {
                'id': 'k2mm4bCdJ6CQ2i7c8o2',
                'ext': 'mp4',
                'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
                'uploader': 'Spi0n',
            },
            'add_ie': ['Dailymotion'],
        },
        # YouTube embed
        {
            'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
            'info_dict': {
                'id': 'FXRb4ykk4S0',
                'ext': 'mp4',
                'title': 'The NBL Auction 2014',
                'uploader': 'BADMINTON England',
                'uploader_id': 'BADMINTONEvents',
                'upload_date': '20140603',
                'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
            },
            'add_ie': ['Youtube'],
            'params': {
                'skip_download': True,
            }
        },
        # MTVSercices embed
        {
            'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
            'md5': '35727f82f58c76d996fc188f9755b0d5',
            'info_dict': {
                'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
                'ext': 'mp4',
                'title': 'Review',
                'description': 'Mario\'s life in the fast lane has never looked so good.',
            },
        },
        # YouTube embed via <data-embed-url="">
        {
            'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
            'info_dict': {
                'id': '4vAffPZIT44',
                'ext': 'mp4',
                'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
                'uploader': 'Gameloft',
                'uploader_id': 'gameloft',
                'upload_date': '20140828',
                'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
            },
            'params': {
                'skip_download': True,
            }
        },
        # Camtasia studio
        {
            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
            'playlist': [{
                'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
                'info_dict': {
                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
                    'ext': 'flv',
                    'duration': 2235.90,
                }
            }, {
                'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
                'info_dict': {
                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
                    'ext': 'flv',
                    'duration': 2235.93,
                }
            }],
            'info_dict': {
                'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
            }
        },
        # Flowplayer
        {
            'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
            'md5': '9d65602bf31c6e20014319c7d07fba27',
            'info_dict': {
                'id': '5123ea6d5e5a7',
                'ext': 'mp4',
                'age_limit': 18,
                'uploader': 'www.handjobhub.com',
                'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
            }
        },
        # Multiple brightcove videos
        # https://github.com/rg3/youtube-dl/issues/2283
        {
            'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
            'info_dict': {
                'id': 'always-never',
                'title': 'Always / Never - The New Yorker',
            },
            'playlist_count': 3,
            'params': {
                'extract_flat': False,
                'skip_download': True,
            }
        },
        # MLB embed
        {
            'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
            'md5': '96f09a37e44da40dd083e12d9a683327',
            'info_dict': {
                'id': '33322633',
                'ext': 'mp4',
                'title': 'Ump changes call to ball',
                'description': 'md5:71c11215384298a172a6dcb4c2e20685',
                'duration': 48,
                'timestamp': 1401537900,
                'upload_date': '20140531',
                'thumbnail': 're:^https?://.*\.jpg$',
            },
        },
        # Wistia embed
        {
            'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
            'md5': '8788b683c777a5cf25621eaf286d0c23',
            'info_dict': {
                'id': '1cfaf6b7ea',
                'ext': 'mov',
                'title': 'md5:51364a8d3d009997ba99656004b5e20d',
                'duration': 643.0,
                'filesize': 182808282,
                'uploader': 'education-portal.com',
            },
        },
        {
            'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
            'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
            'info_dict': {
                'id': 'uxjb0lwrcz',
                'ext': 'mp4',
                'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
                'duration': 1715.0,
                'uploader': 'thoughtworks.wistia.com',
            },
        },
        # Soundcloud embed
        {
            'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
            'info_dict': {
                'id': '174391317',
                'ext': 'mp3',
                'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
                'uploader': 'Sophos Security',
                'title': 'Chet Chat 171 - Oct 29, 2014',
                'upload_date': '20141029',
            }
        },
        # Livestream embed
        {
            'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
            'info_dict': {
                'id': '67864563',
                'ext': 'flv',
                'upload_date': '20141112',
                'title': 'Rosetta #CometLanding webcast HL 10',
            }
        },
        # LazyYT
        {
            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
            'info_dict': {
                'id': '1986',
                'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
            },
            'playlist_mincount': 2,
        },
        # Cinchcast embed
        {
            'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
            'info_dict': {
                'id': '7141703',
                'ext': 'mp3',
                'upload_date': '20141126',
                'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
            }
        },
        # Cinerama player
        {
            'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
            'info_dict': {
                'id': '730m_DandD_1901_512k',
                'ext': 'mp4',
                'uploader': 'www.abc.net.au',
                'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
            }
        },
        # embedded viddler video
        {
            'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
            'info_dict': {
                'id': '4d03aad9',
                'ext': 'mp4',
                'uploader': 'deadspin',
                'title': 'WALL-TO-GORTAT',
                'timestamp': 1422285291,
                'upload_date': '20150126',
            },
            'add_ie': ['Viddler'],
        },
        # Libsyn embed
        {
            'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
            'info_dict': {
                'id': '3377616',
                'ext': 'mp3',
                'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
                'description': 'md5:601cb790edd05908957dae8aaa866465',
                'upload_date': '20150220',
            },
        },
        # jwplayer YouTube
        {
            'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
            'info_dict': {
                'id': 'Mrj4DVp2zeA',
                'ext': 'mp4',
                'upload_date': '20150212',
                'uploader': 'The National Archives UK',
                'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
                'uploader_id': 'NationalArchives08',
                'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
            },
        },
        # rtl.nl embed
        {
            'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
            'playlist_mincount': 5,
            'info_dict': {
                'id': 'aanslagen-kopenhagen',
                'title': 'Aanslagen Kopenhagen | RTL Nieuws',
            }
        },
        # Zapiks embed
        {
            'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
            'info_dict': {
                'id': '118046',
                'ext': 'mp4',
                'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
            }
        },
        # Kaltura embed
        {
            'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
            'info_dict': {
                'id': '1_eergr3h1',
                'ext': 'mp4',
                'upload_date': '20150226',
                'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
                'timestamp': int,
                'title': 'John Carlson Postgame 2/25/15',
            },
        },
        # Kaltura embed (different embed code)
        {
            'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
            'info_dict': {
                'id': '1_a52wc67y',
                'ext': 'flv',
                'upload_date': '20150127',
                'uploader_id': 'PremierMedia',
                'timestamp': int,
                'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
            },
        },
        # Eagle.Platform embed (generic URL)
        {
            'url': 'http://lenta.ru/news/2015/03/06/navalny/',
            'info_dict': {
                'id': '227304',
                'ext': 'mp4',
                'title': 'Навальный вышел на свободу',
                'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
                'thumbnail': 're:^https?://.*\.jpg$',
                'duration': 87,
                'view_count': int,
                'age_limit': 0,
            },
        },
        # ClipYou (Eagle.Platform) embed (custom URL)
        {
            'url': 'http://muz-tv.ru/play/7129/',
            'info_dict': {
                'id': '12820',
                'ext': 'mp4',
                'title': "'O Sole Mio",
                'thumbnail': 're:^https?://.*\.jpg$',
                'duration': 216,
                'view_count': int,
            },
        },
        # Pladform embed
        {
            'url': 'http://muz-tv.ru/kinozal/view/7400/',
            'info_dict': {
                'id': '100183293',
                'ext': 'mp4',
                'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
                'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
                'thumbnail': 're:^https?://.*\.jpg$',
                'duration': 694,
                'age_limit': 0,
            },
        },
        # Playwire embed
        {
            'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
            'info_dict': {
                'id': '3519514',
                'ext': 'mp4',
                'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
                'thumbnail': 're:^https?://.*\.png$',
                'duration': 45.115,
            },
        },
        # 5min embed
        {
            'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
            'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
            'info_dict': {
                'id': '518726732',
                'ext': 'mp4',
                'title': 'Facebook Creates "On This Day" | Crunch Report',
            },
        },
        # SVT embed
        {
            'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
            'info_dict': {
                'id': '2900353',
                'ext': 'flv',
                'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
                'duration': 27,
                'age_limit': 0,
            },
        },
        # Crooks and Liars embed
        {
            'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
            'info_dict': {
                'id': '8RUoRhRi',
                'ext': 'mp4',
                'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
                'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
                'timestamp': 1428207000,
                'upload_date': '20150405',
                'uploader': 'Heather',
            },
        },
        # Crooks and Liars external embed
        {
            'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
            'info_dict': {
                'id': 'MTE3MjUtMzQ2MzA',
                'ext': 'mp4',
                'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
                'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
                'timestamp': 1265032391,
                'upload_date': '20100201',
                'uploader': 'Heather',
            },
        },
        # NBC Sports vplayer embed
        {
            'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
            'info_dict': {
                'id': 'ln7x1qSThw4k',
                'ext': 'flv',
                'title': "PFT Live: New leader in the 'new-look' defense",
                'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
            },
        },
        # UDN embed
        {
            'url': 'http://www.udn.com/news/story/7314/822787',
            'md5': 'fd2060e988c326991037b9aff9df21a6',
            'info_dict': {
                'id': '300346',
                'ext': 'mp4',
                'title': '中一中男師變性 全校師生力挺',
                'thumbnail': 're:^https?://.*\.jpg$',
            }
        },
        # Ooyala embed
        {
            'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
            'info_dict': {
                'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
                'ext': 'mp4',
                'description': 'VIDEO: Index/Match versus VLOOKUP.',
                'title': 'This is what separates the Excel masters from the wannabes',
            },
            'params': {
                # m3u8 downloads
                'skip_download': True,
            }
        },
        # Contains a SMIL manifest
        {
            'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
            'info_dict': {
                'id': 'file',
                'ext': 'flv',
                'title': '+ Football: Lottery Champions League Europe',
                'uploader': 'www.telewebion.com',
            },
            'params': {
                # rtmpe downloads
                'skip_download': True,
            }
        },
        # Brightcove URL in single quotes
        {
            'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
            'md5': '4ae374f1f8b91c889c4b9203c8c752af',
            'info_dict': {
                'id': '4255764656001',
                'ext': 'mp4',
                'title': 'SN Presents: Russell Martin, World Citizen',
                'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
                'uploader': 'Rogers Sportsnet',
            },
        },
        # Dailymotion Cloud video
        {
            'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
            'md5': '49444254273501a64675a7e68c502681',
            'info_dict': {
                'id': '5585de919473990de4bee11b',
                'ext': 'mp4',
                'title': 'Le débat',
                'thumbnail': 're:^https?://.*\.jpe?g$',
            }
        },
        # OnionStudios embed
        {
            'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
            'info_dict': {
                'id': '2855',
                'ext': 'mp4',
                'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
                'thumbnail': 're:^https?://.*\.jpe?g$',
                'uploader': 'ClickHole',
                'uploader_id': 'clickhole',
            }
        },
        # SnagFilms embed
        {
            'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
            'info_dict': {
                'id': '74849a00-85a9-11e1-9660-123139220831',
                'ext': 'mp4',
                'title': '#whilewewatch',
            }
        },
        # AdobeTVVideo embed
        {
            'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
            'md5': '43662b577c018ad707a63766462b1e87',
            'info_dict': {
                'id': '2456',
                'ext': 'mp4',
                'title': 'New experience with Acrobat DC',
                'description': 'New experience with Acrobat DC',
                'duration': 248.667,
            },
        }
    ]

    def report_following_redirect(self, new_url):
        """Report information extraction."""
        self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)

    def _extract_rss(self, url, video_id, doc):
        playlist_title = doc.find('./channel/title').text
        playlist_desc_el = doc.find('./channel/description')
        playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text

        entries = []
        for it in doc.findall('./channel/item'):
            next_url = xpath_text(it, 'link', fatal=False)
            if not next_url:
                enclosure_nodes = it.findall('./enclosure')
                for e in enclosure_nodes:
                    next_url = e.attrib.get('url')
                    if next_url:
                        break

            if not next_url:
                continue

            entries.append({
                '_type': 'url',
                'url': next_url,
                'title': it.find('title').text,
            })

        return {
            '_type': 'playlist',
            'id': url,
            'title': playlist_title,
            'description': playlist_desc,
            'entries': entries,
        }

    def _extract_camtasia(self, url, video_id, webpage):
        """ Returns None if no camtasia video can be found. """

        camtasia_cfg = self._search_regex(
            r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
            webpage, 'camtasia configuration file', default=None)
        if camtasia_cfg is None:
            return None

        title = self._html_search_meta('DC.title', webpage, fatal=True)

        camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
        camtasia_cfg = self._download_xml(
            camtasia_url, video_id,
            note='Downloading camtasia configuration',
            errnote='Failed to download camtasia configuration')
        fileset_node = camtasia_cfg.find('./playlist/array/fileset')

        entries = []
        for n in fileset_node.getchildren():
            url_n = n.find('./uri')
            if url_n is None:
                continue

            entries.append({
                'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
                'title': '%s - %s' % (title, n.tag),
                'url': compat_urlparse.urljoin(url, url_n.text),
                'duration': float_or_none(n.find('./duration').text),
            })

        return {
            '_type': 'playlist',
            'entries': entries,
            'title': title,
        }

    def _real_extract(self, url):
        if url.startswith('//'):
            return {
                '_type': 'url',
                'url': self.http_scheme() + url,
            }

        parsed_url = compat_urlparse.urlparse(url)
        if not parsed_url.scheme:
            default_search = self._downloader.params.get('default_search')
            if default_search is None:
                default_search = 'fixup_error'

            if default_search in ('auto', 'auto_warning', 'fixup_error'):
                if '/' in url:
                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
                    return self.url_result('http://' + url)
                elif default_search != 'fixup_error':
                    if default_search == 'auto_warning':
                        if re.match(r'^(?:url|URL)$', url):
                            raise ExtractorError(
                                'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
                                expected=True)
                        else:
                            self._downloader.report_warning(
                                'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
                    return self.url_result('ytsearch:' + url)

            if default_search in ('error', 'fixup_error'):
                raise ExtractorError(
                    '%r is not a valid URL. '
                    'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
                    % (url, url), expected=True)
            else:
                if ':' not in default_search:
                    default_search += ':'
                return self.url_result(default_search + url)

        url, smuggled_data = unsmuggle_url(url)
        force_videoid = None
        is_intentional = smuggled_data and smuggled_data.get('to_generic')
        if smuggled_data and 'force_videoid' in smuggled_data:
            force_videoid = smuggled_data['force_videoid']
            video_id = force_videoid
        else:
            video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])

        self.to_screen('%s: Requesting header' % video_id)

        head_req = HEADRequest(url)
        head_response = self._request_webpage(
            head_req, video_id,
            note=False, errnote='Could not send HEAD request to %s' % url,
            fatal=False)

        if head_response is not False:
            # Check for redirect
            new_url = head_response.geturl()
            if url != new_url:
                self.report_following_redirect(new_url)
                if force_videoid:
                    new_url = smuggle_url(
                        new_url, {'force_videoid': force_videoid})
                return self.url_result(new_url)

        full_response = None
        if head_response is False:
            request = compat_urllib_request.Request(url)
            request.add_header('Accept-Encoding', '*')
            full_response = self._request_webpage(request, video_id)
            head_response = full_response

        # Check for direct link to a video
        content_type = head_response.headers.get('Content-Type', '')
        m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
        if m:
            upload_date = unified_strdate(
                head_response.headers.get('Last-Modified'))
            return {
                'id': video_id,
                'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
                'direct': True,
                'formats': [{
                    'format_id': m.group('format_id'),
                    'url': url,
                    'vcodec': 'none' if m.group('type') == 'audio' else None
                }],
                'upload_date': upload_date,
            }

        if not self._downloader.params.get('test', False) and not is_intentional:
            force = self._downloader.params.get('force_generic_extractor', False)
            self._downloader.report_warning(
                '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))

        if not full_response:
            request = compat_urllib_request.Request(url)
            # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
            # making it impossible to download only chunk of the file (yet we need only 512kB to
            # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
            # that will always result in downloading the whole file that is not desirable.
            # Therefore for extraction pass we have to override Accept-Encoding to any in order
            # to accept raw bytes and being able to download only a chunk.
            # It may probably better to solve this by checking Content-Type for application/octet-stream
            # after HEAD request finishes, but not sure if we can rely on this.
            request.add_header('Accept-Encoding', '*')
            full_response = self._request_webpage(request, video_id)

        # Maybe it's a direct link to a video?
        # Be careful not to download the whole thing!
        first_bytes = full_response.read(512)
        if not is_html(first_bytes):
            self._downloader.report_warning(
                'URL could be a direct video link, returning it as such.')
            upload_date = unified_strdate(
                head_response.headers.get('Last-Modified'))
            return {
                'id': video_id,
                'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
                'direct': True,
                'url': url,
                'upload_date': upload_date,
            }

        webpage = self._webpage_read_content(
            full_response, url, video_id, prefix=first_bytes)

        self.report_extraction(video_id)

        # Is it an RSS feed?
        try:
            doc = parse_xml(webpage)
            if doc.tag == 'rss':
                return self._extract_rss(url, video_id, doc)
        except compat_xml_parse_error:
            pass

        # Is it a Camtasia project?
        camtasia_res = self._extract_camtasia(url, video_id, webpage)
        if camtasia_res is not None:
            return camtasia_res

        # Sometimes embedded video player is hidden behind percent encoding
        # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
        # Unescaping the whole page allows to handle those cases in a generic way
        webpage = compat_urllib_parse_unquote(webpage)

        # it's tempting to parse this further, but you would
        # have to take into account all the variations like
        #   Video Title - Site Name
        #   Site Name | Video Title
        #   Video Title - Tagline | Site Name
        # and so on and so forth; it's just not practical
        video_title = self._html_search_regex(
            r'(?s)<title>(.*?)</title>', webpage, 'video title',
            default='video')

        # Try to detect age limit automatically
        age_limit = self._rta_search(webpage)
        # And then there are the jokers who advertise that they use RTA,
        # but actually don't.
        AGE_LIMIT_MARKERS = [
            r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
        ]
        if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
            age_limit = 18

        # video uploader is domain name
        video_uploader = self._search_regex(
            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')

        # Helper method
        def _playlist_from_matches(matches, getter=None, ie=None):
            urlrs = orderedSet(
                self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
                for m in matches)
            return self.playlist_result(
                urlrs, playlist_id=video_id, playlist_title=video_title)

        # Look for BrightCove:
        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
        if bc_urls:
            self.to_screen('Brightcove video detected.')
            entries = [{
                '_type': 'url',
                'url': smuggle_url(bc_url, {'Referer': url}),
                'ie_key': 'Brightcove'
            } for bc_url in bc_urls]

            return {
                '_type': 'playlist',
                'title': video_title,
                'id': video_id,
                'entries': entries,
            }

        # Look for embedded rtl.nl player
        matches = re.findall(
            r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
            webpage)
        if matches:
            return _playlist_from_matches(matches, ie='RtlNl')

        vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
        if vimeo_url is not None:
            return self.url_result(vimeo_url)

        vid_me_embed_url = self._search_regex(
            r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
            webpage, 'vid.me embed', default=None)
        if vid_me_embed_url is not None:
            return self.url_result(vid_me_embed_url, 'Vidme')

        # Look for embedded YouTube player
        matches = re.findall(r'''(?x)
            (?:
                <iframe[^>]+?src=|
                data-video-url=|
                <embed[^>]+?src=|
                embedSWF\(?:\s*|
                new\s+SWFObject\(
            )
            (["\'])
                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
                (?:embed|v|p)/.+?)
            \1''', webpage)
        if matches:
            return _playlist_from_matches(
                matches, lambda m: unescapeHTML(m[1]))

        # Look for lazyYT YouTube embed
        matches = re.findall(
            r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
        if matches:
            return _playlist_from_matches(matches, lambda m: unescapeHTML(m))

        # Look for embedded Dailymotion player
        matches = re.findall(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
        if matches:
            return _playlist_from_matches(
                matches, lambda m: unescapeHTML(m[1]))

        # Look for embedded Dailymotion playlist player (#3822)
        m = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
        if m:
            playlists = re.findall(
                r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
            if playlists:
                return _playlist_from_matches(
                    playlists, lambda p: '//dailymotion.com/playlist/%s' % p)

        # Look for embedded Wistia player
        match = re.search(
            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
        if match:
            embed_url = self._proto_relative_url(
                unescapeHTML(match.group('url')))
            return {
                '_type': 'url_transparent',
                'url': embed_url,
                'ie_key': 'Wistia',
                'uploader': video_uploader,
                'title': video_title,
                'id': video_id,
            }

        match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
        if match:
            return {
                '_type': 'url_transparent',
                'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
                'ie_key': 'Wistia',
                'uploader': video_uploader,
                'title': video_title,
                'id': match.group('id')
            }

        # Look for embedded blip.tv player
        bliptv_url = BlipTVIE._extract_url(webpage)
        if bliptv_url:
            return self.url_result(bliptv_url, 'BlipTV')

        # Look for SVT player
        svt_url = SVTIE._extract_url(webpage)
        if svt_url:
            return self.url_result(svt_url, 'SVT')

        # Look for embedded condenast player
        matches = re.findall(
            r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
            webpage)
        if matches:
            return {
                '_type': 'playlist',
                'entries': [{
                    '_type': 'url',
                    'ie_key': 'CondeNast',
                    'url': ma,
                } for ma in matches],
                'title': video_title,
                'id': video_id,
            }

        # Look for Bandcamp pages with custom domain
        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
        if mobj is not None:
            burl = unescapeHTML(mobj.group(1))
            # Don't set the extractor because it can be a track url or an album
            return self.url_result(burl)

        # Look for embedded Vevo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for embedded Viddler player
        mobj = re.search(
            r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for NYTimes player
        mobj = re.search(
            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for Libsyn player
        mobj = re.search(
            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for Ooyala videos
        mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
                re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
                re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
        if mobj is not None:
            return OoyalaIE._build_url_result(mobj.group('ec'))

        # Look for multiple Ooyala embeds on SBN network websites
        mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
        if mobj is not None:
            embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
            if embeds:
                return _playlist_from_matches(
                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')

        # Look for Aparat videos
        mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Aparat')

        # Look for MPORA videos
        mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Mpora')

        # Look for embedded NovaMov-based player
        mobj = re.search(
            r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
                    (?P<url>http://(?:(?:embed|www)\.)?
                        (?:novamov\.com|
                           nowvideo\.(?:ch|sx|eu|at|ag|co)|
                           videoweed\.(?:es|com)|
                           movshare\.(?:net|sx|ag)|
                           divxstage\.(?:eu|net|ch|co|at|ag))
                        /embed\.php.+?)\1''', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for embedded Facebook player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Facebook')

        # Look for embedded VK player
        mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'VK')

        # Look for embedded ivi player
        mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Ivi')

        # Look for embedded Huffington Post player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'HuffPost')

        # Look for embed.ly
        mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))
        mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
        if mobj is not None:
            return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))

        # Look for funnyordie embed
        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
        if matches:
            return _playlist_from_matches(
                matches, getter=unescapeHTML, ie='FunnyOrDie')

        # Look for BBC iPlayer embed
        matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
        if matches:
            return _playlist_from_matches(matches, ie='BBCCoUk')

        # Look for embedded RUTV player
        rutv_url = RUTVIE._extract_url(webpage)
        if rutv_url:
            return self.url_result(rutv_url, 'RUTV')

        # Look for embedded TVC player
        tvc_url = TVCIE._extract_url(webpage)
        if tvc_url:
            return self.url_result(tvc_url, 'TVC')

        # Look for embedded SportBox player
        sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
        if sportbox_urls:
            return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')

        # Look for embedded PornHub player
        pornhub_url = PornHubIE._extract_url(webpage)
        if pornhub_url:
            return self.url_result(pornhub_url, 'PornHub')

        # Look for embedded XHamster player
        xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
        if xhamster_urls:
            return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')

        # Look for embedded Tvigle player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Tvigle')

        # Look for embedded TED player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'TED')

        # Look for embedded Ustream videos
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Ustream')

        # Look for embedded arte.tv player
        mobj = re.search(
            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'ArteTVEmbed')

        # Look for embedded francetv player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for embedded smotri.com player
        smotri_url = SmotriIE._extract_url(webpage)
        if smotri_url:
            return self.url_result(smotri_url, 'Smotri')

        # Look for embedded Myvi.ru player
        myvi_url = MyviIE._extract_url(webpage)
        if myvi_url:
            return self.url_result(myvi_url)

        # Look for embeded soundcloud player
        mobj = re.search(
            r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
            webpage)
        if mobj is not None:
            url = unescapeHTML(mobj.group('url'))
            return self.url_result(url)

        # Look for embedded vulture.com player
        mobj = re.search(
            r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
            webpage)
        if mobj is not None:
            url = unescapeHTML(mobj.group('url'))
            return self.url_result(url, ie='Vulture')

        # Look for embedded mtvservices player
        mobj = re.search(
            r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
            webpage)
        if mobj is not None:
            url = unescapeHTML(mobj.group('url'))
            return self.url_result(url, ie='MTVServicesEmbedded')

        # Look for embedded yahoo player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Yahoo')

        # Look for embedded sbs.com.au player
        mobj = re.search(
            r'''(?x)
            (?:
                <meta\s+property="og:video"\s+content=|
                <iframe[^>]+?src=
            )
            (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'SBS')

        # Look for embedded Cinchcast player
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Cinchcast')

        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
            webpage)
        if not mobj:
            mobj = re.search(
                r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
                webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'MLB')

        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
            webpage)
        if mobj is not None:
            return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')

        mobj = re.search(
            r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
            webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Livestream')

        # Look for Zapiks embed
        mobj = re.search(
            r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Zapiks')

        # Look for Kaltura embeds
        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
                re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
        if mobj is not None:
            return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')

        # Look for Eagle.Platform embeds
        mobj = re.search(
            r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'EaglePlatform')

        # Look for ClipYou (uses Eagle.Platform) embeds
        mobj = re.search(
            r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
        if mobj is not None:
            return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')

        # Look for Pladform embeds
        mobj = re.search(
            r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'Pladform')

        # Look for Playwire embeds
        mobj = re.search(
            r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for 5min embeds
        mobj = re.search(
            r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
        if mobj is not None:
            return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')

        # Look for Crooks and Liars embeds
        mobj = re.search(
            r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
        if mobj is not None:
            return self.url_result(mobj.group('url'))

        # Look for NBC Sports VPlayer embeds
        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
        if nbc_sports_url:
            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')

        # Look for Google Drive embeds
        google_drive_url = GoogleDriveEmbedIE._extract_url(webpage)
        if google_drive_url:
            return self.url_result(google_drive_url, 'GoogleDrive')

        # Look for UDN embeds
        mobj = re.search(
            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
        if mobj is not None:
            return self.url_result(
                compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')

        # Look for Senate ISVP iframe
        senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
        if senate_isvp_url:
            return self.url_result(senate_isvp_url, 'SenateISVP')

        # Look for Dailymotion Cloud videos
        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
        if dmcloud_url:
            return self.url_result(dmcloud_url, 'DailymotionCloud')

        # Look for OnionStudios embeds
        onionstudios_url = OnionStudiosIE._extract_url(webpage)
        if onionstudios_url:
            return self.url_result(onionstudios_url)

        # Look for SnagFilms embeds
        snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
        if snagfilms_url:
            return self.url_result(snagfilms_url)

        # Look for AdobeTVVideo embeds
        mobj = re.search(
            r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
            webpage)
        if mobj is not None:
            return self.url_result(
                self._proto_relative_url(unescapeHTML(mobj.group(1))),
                'AdobeTVVideo')

        def check_video(vurl):
            if YoutubeIE.suitable(vurl):
                return True
            vpath = compat_urlparse.urlparse(vurl).path
            vext = determine_ext(vpath)
            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')

        def filter_video(urls):
            return list(filter(check_video, urls))

        # Start with something easy: JW Player in SWFObject
        found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
        if not found:
            # Look for gorilla-vid style embedding
            found = filter_video(re.findall(r'''(?sx)
                (?:
                    jw_plugins|
                    JWPlayerOptions|
                    jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
                )
                .*?
                ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
        if not found:
            # Broaden the search a little bit
            found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
        if not found:
            # Broaden the findall a little bit: JWPlayer JS loader
            found = filter_video(re.findall(
                r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
        if not found:
            # Flow player
            found = filter_video(re.findall(r'''(?xs)
                flowplayer\("[^"]+",\s*
                    \{[^}]+?\}\s*,
                    \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
                        ["']?url["']?\s*:\s*["']([^"']+)["']
            ''', webpage))
        if not found:
            # Cinerama player
            found = re.findall(
                r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
        if not found:
            # Try to find twitter cards info
            found = filter_video(re.findall(
                r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
        if not found:
            # We look for Open Graph info:
            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
            m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
            if m_video_type is not None:
                found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
        if not found:
            # HTML5 video
            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
        if not found:
            REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
            found = re.search(
                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
                r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
                webpage)
            if not found:
                # Look also in Refresh HTTP header
                refresh_header = head_response.headers.get('Refresh')
                if refresh_header:
                    found = re.search(REDIRECT_REGEX, refresh_header)
            if found:
                new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
                self.report_following_redirect(new_url)
                return {
                    '_type': 'url',
                    'url': new_url,
                }
        if not found:
            raise UnsupportedError(url)

        entries = []
        for video_url in found:
            video_url = compat_urlparse.urljoin(url, video_url)
            video_id = compat_urllib_parse_unquote(os.path.basename(video_url))

            # Sometimes, jwplayer extraction will result in a YouTube URL
            if YoutubeIE.suitable(video_url):
                entries.append(self.url_result(video_url, 'Youtube'))
                continue

            # here's a fun little line of code for you:
            video_id = os.path.splitext(video_id)[0]

            if determine_ext(video_url) == 'smil':
                entries.append({
                    'id': video_id,
                    'formats': self._extract_smil_formats(video_url, video_id),
                    'uploader': video_uploader,
                    'title': video_title,
                    'age_limit': age_limit,
                })
            else:
                entries.append({
                    'id': video_id,
                    'url': video_url,
                    'uploader': video_uploader,
                    'title': video_title,
                    'age_limit': age_limit,
                })

        if len(entries) == 1:
            return entries[0]
        else:
            for num, e in enumerate(entries, start=1):
                # 'url' results don't have a title
                if e.get('title') is not None:
                    e['title'] = '%s (%d)' % (e['title'], num)
            return {
                '_type': 'playlist',
                'entries': entries,
            }
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											11 years ago
+								# encoding: utf-8
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								from __future__ import unicode_literals
-												Move GenericIE into its own file

											
										
										
											11 years ago
+								import os
 								import re
 								from .common import InfoExtractor
-												[youtube] Support jwplayer with YouTube URLs (Closes #2075)

											
										
										
											10 years ago
+								from .youtube import YoutubeIE
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								from ..compat import (
-												[extractor/generic] Use compat_urllib_parse_unquote for unquoting video_id and title from URL

											
										
										
											9 years ago
+								    compat_urllib_parse_unquote,
 								    compat_urllib_request,
-												[generic] simply use urljoin

											
										
										
											11 years ago
+								    compat_urlparse,
-												[generic] Fix on python 2.6

`ParseError` is not available, it raises `xml.parsers.expat.ExpatError`.
The webpage needs to be encoded.

											
										
										
											10 years ago
+								    compat_xml_parse_error,
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											10 years ago
+								)
 								from ..utils import (
-												fix up imports

											
										
										
											10 years ago
+								    determine_ext,
-												Move GenericIE into its own file

											
										
										
											11 years ago
+								    ExtractorError,
-												[generic] Add support for camtasia videos (Fixes #3574)

											
										
										
											10 years ago
+								    float_or_none,
-												[aparat] Add support (Fixes #2012)

											
										
										
											11 years ago
+								    HEADRequest,
-												[generic] Add support for BOMs (Fixes #4753)

											
										
										
											9 years ago
+								    is_html,
-												[generic] Simplify playlist support (#2948)

											
										
										
											10 years ago
+								    orderedSet,
-												[generic] Suppress warning about doctypes in RSS parser

											
										
										
											10 years ago
+								    parse_xml,
-												[generic] Support embedded vimeo videos (#1602)

											
										
										
											11 years ago
+								    smuggle_url,
 								    unescapeHTML,
-												Add support for direct links to a video (#1973)

											
										
										
											11 years ago
+								    unified_strdate,
-												[ministrygrid] Add extractor (Fixes #2900)

											
										
										
											10 years ago
+								    unsmuggle_url,
-												Add documentation about supported sites (Fixes #4503)

											
										
										
											9 years ago
+								    UnsupportedError,
-												Add support for direct links to a video (#1973)

											
										
										
											11 years ago
+								    url_basename,
-												[generic] Parse RSS enclosure URLs (Fixes #5091)

											
										
										
											9 years ago
+								    xpath_text,
-												Move GenericIE into its own file

											
										
										
											11 years ago
+								)
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											11 years ago
+								from .brightcove import BrightcoveIE
-												[NBC/ThePlatform/Generic] Add a generic detector for NBCSportsVPlayer and enhance error detection in ThePlatformIE

											
										
										
											9 years ago
+								from .nbc import NBCSportsVPlayerIE
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											11 years ago
+								from .ooyala import OoyalaIE
-												[generic] Add support for embedded rutv player

											
										
										
											10 years ago
+								from .rutv import RUTVIE
-												[tvc] Refactor extractor names

											
										
										
											9 years ago
+								from .tvc import TVCIE
-												[generic] Add support for sportbox embeds

											
										
										
											9 years ago
+								from .sportbox import SportBoxEmbedIE
-												[smotri] Modernize and add support for emdebbed videos (Closes #2585)

											
										
										
											10 years ago
+								from .smotri import SmotriIE
-												[myvi:embed] Rename to myvi

											
										
										
											9 years ago
+								from .myvi import MyviIE
-												[condenast] Add support for embedded videos (Closes #3929)

											
										
										
											10 years ago
+								from .condenast import CondeNastIE
-												[udn] Add new extractor

											
										
										
											9 years ago
+								from .udn import UDNEmbedIE
-												[CSpan] Add detection for Senate ISVP. Closes #5302

											
										
										
											9 years ago
+								from .senateisvp import SenateISVPIE
-												[Cinemassacre] Add detection for videos from blip.tv

											
										
										
											9 years ago
+								from .bliptv import BlipTVIE
-												[extractor/generic] Add support for svt embeds (Closes #5622)

											
										
										
											9 years ago
+								from .svt import SVTIE
-												[extractor/generic] Add support for pornhub embeds

											
										
										
											9 years ago
+								from .pornhub import PornHubIE
-												[generic] Add support for xhamster embeds

											
										
										
											9 years ago
+								from .xhamster import XHamsterEmbedIE
-												[vimeo/generic] Move detection logic from GenericIE to VimeoIE

											
										
										
											9 years ago
+								from .vimeo import VimeoIE
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											9 years ago
+								from .dailymotion import DailymotionCloudIE
-												[extractor/generic] Add support for OnionStudios embeds (Closes #5841)

											
										
										
											9 years ago
+								from .onionstudios import OnionStudiosIE
-												[extractor/generic] Add support for snagfilms embeds

											
										
										
											9 years ago
+								from .snagfilms import SnagFilmsEmbedIE
-												add google drive embeds
											
										
										
											9 years ago
+								from .googledrive import GoogleDriveEmbedIE
-												Move GenericIE into its own file

											
										
										
											11 years ago
-												[generic] Support double slash URLs (Fixes #1309)

											
										
										
											11 years ago
-												Move GenericIE into its own file

											
										
										
											11 years ago
+								class GenericIE(InfoExtractor):
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								    IE_DESC = 'Generic downloader that works on some sites'
-												Move GenericIE into its own file

											
										
										
											11 years ago
+								    _VALID_URL = r'.*'
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								    IE_NAME = 'generic'
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											11 years ago
+								    _TESTS = [
-												[extractor/generic] Put all direct link tests near to each other for better navigation

											
										
										
											9 years ago
+								        # Direct link to a video
 								        {
 								            'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
 								            'md5': '67d406c2bcb6af27fa886f31aa934bbe',
 								            'info_dict': {
 								                'id': 'trailer',
 								                'ext': 'mp4',
 								                'title': 'trailer',
 								                'upload_date': '20100513',
 								            }
 								        },
-												[extractor/generic] Clarify test comment

											
										
										
											9 years ago
+								        # Direct link to media delivered compressed (until Accept-Encoding is *)
-												[extractor/generic] Put all direct link tests near to each other for better navigation

											
										
										
											9 years ago
+								        {
 								            'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
 								            'md5': '128c42e68b13950268b648275386fc74',
 								            'info_dict': {
 								                'id': 'FictionJunction-Parallel_Hearts',
 								                'ext': 'flac',
 								                'title': 'FictionJunction-Parallel_Hearts',
 								                'upload_date': '20140522',
 								            },
 								            'expected_warnings': [
 								                'URL could be a direct video link, returning it as such.'
 								            ]
 								        },
 								        # Direct download with broken HEAD
 								        {
 								            'url': 'http://ai-radio.org:8000/radio.opus',
 								            'info_dict': {
 								                'id': 'radio',
 								                'ext': 'opus',
 								                'title': 'radio',
 								            },
 								            'params': {
 								                'skip_download': True,  # infinite live stream
 								            },
 								            'expected_warnings': [
 								                r'501.*Not Implemented'
 								            ],
 								        },
 								        # Direct link with incorrect MIME type
 								        {
 								            'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 								            'md5': '4ccbebe5f36706d85221f204d7eb5913',
 								            'info_dict': {
 								                'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
 								                'id': '5_Lennart_Poettering_-_Systemd',
 								                'ext': 'webm',
 								                'title': '5_Lennart_Poettering_-_Systemd',
 								                'upload_date': '20141120',
 								            },
 								            'expected_warnings': [
 								                'URL could be a direct video link, returning it as such.'
 								            ]
 								        },
 								        # RSS feed
 								        {
 								            'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 								            'info_dict': {
 								                'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
 								                'title': 'Zero Punctuation',
 								                'description': 're:.*groundbreaking video review series.*'
 								            },
 								            'playlist_mincount': 11,
 								        },
 								        # RSS feed with enclosure
 								        {
 								            'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
 								            'info_dict': {
 								                'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 								                'ext': 'm4v',
 								                'upload_date': '20150228',
 								                'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
 								            }
 								        },
 								        # google redirect
 								        {
 								            'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
 								            'info_dict': {
 								                'id': 'cmQHVoWB5FY',
 								                'ext': 'mp4',
 								                'upload_date': '20130224',
 								                'uploader_id': 'TheVerge',
 								                'description': 're:^Chris Ziegler takes a look at the\.*',
 								                'uploader': 'The Verge',
 								                'title': 'First Firefox OS phones side-by-side',
 								            },
 								            'params': {
 								                'skip_download': False,
 								            }
 								        },
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											11 years ago
+								        {
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								            'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
-												[generic] Update test

											
										
										
											10 years ago
+								            'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								            'info_dict': {
-												[generic] Update test

											
										
										
											10 years ago
+								                'id': '13601338388002',
 								                'ext': 'mp4',
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								                'uploader': 'www.hodiho.fr',
 								                'title': 'R\u00e9gis plante sa Jeep',
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											11 years ago
+								            }
 								        },
-												[generic] Detect bandcamp pages that use custom domains (closes #1662)

They embed the original url in the 'og:url' property.

											
										
										
											11 years ago
+								        # bandcamp page with custom domain
 								        {
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								            'add_ie': ['Bandcamp'],
 								            'url': 'http://bronyrock.com/track/the-pony-mash',
 								            'info_dict': {
-												[generic] Modernize tests

											
										
										
											10 years ago
+								                'id': '3235767654',
 								                'ext': 'mp3',
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								                'title': 'The Pony Mash',
 								                'uploader': 'M_Pallante',
-												[generic] Detect bandcamp pages that use custom domains (closes #1662)

They embed the original url in the 'og:url' property.

											
										
										
											11 years ago
+								            },
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								            'skip': 'There is a limit of 200 free downloads / month for the test song',
-												[generic] Detect bandcamp pages that use custom domains (closes #1662)

They embed the original url in the 'og:url' property.

											
										
										
											11 years ago
+								        },
-												[brightcove] Add the extraction of the url from generic

											
										
										
											11 years ago
+								        # embedded brightcove video
-												[brightcove] Set the 'Referer' header if the url has the 'linkBaseUrl' parameter (fixes #1553)

											
										
										
											11 years ago
+								        # it also tests brightcove videos that need to set the 'Referer' in the
 								        # http requests
-												[brightcove] Add the extraction of the url from generic

											
										
										
											11 years ago
+								        {
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								            'add_ie': ['Brightcove'],
 								            'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
 								            'info_dict': {
 								                'id': '2765128793001',
 								                'ext': 'mp4',
 								                'title': 'Le cours de bourse : l’analyse technique',
 								                'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
 								                'uploader': 'BFM BUSINESS',
-												[brightcove] Add the extraction of the url from generic

											
										
										
											11 years ago
+								            },
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								            'params': {
 								                'skip_download': True,
-												[brightcove] Add the extraction of the url from generic

											
										
										
											11 years ago
+								            },
 								        },
-												[brightcove] Move test to generic

											
										
										
											10 years ago
+								        {
 								            # https://github.com/rg3/youtube-dl/issues/2253
 								            'url': 'http://bcove.me/i6nfkrc3',
 								            'md5': '0ba9446db037002366bab3b3eb30c88c',
 								            'info_dict': {
-												[generic] Modernize tests

											
										
										
											10 years ago
+								                'id': '3101154703001',
 								                'ext': 'mp4',
-												[brightcove] Move test to generic

											
										
										
											10 years ago
+								                'title': 'Still no power',
 								                'uploader': 'thestar.com',
 								                'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
 								            },
 								            'add_ie': ['Brightcove'],
 								        },
-												[brightcove] Encode object_str with utf-8

											
										
										
											10 years ago
+								        {
 								            'url': 'http://www.championat.com/video/football/v/87/87499.html',
 								            'md5': 'fb973ecf6e4a78a67453647444222983',
 								            'info_dict': {
 								                'id': '3414141473001',
 								                'ext': 'mp4',
 								                'title': 'Видео. Удаление Дзагоева (ЦСКА)',
 								                'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
 								                'uploader': 'Championat',
 								            },
 								        },
-												[generic/brightcove] Add a new test case for kijk.nl (#3541)

											
										
										
											10 years ago
+								        {
-												[brightcove] Extract m3u8 formats (#3541)

											
										
										
											10 years ago
+								            # https://github.com/rg3/youtube-dl/issues/3541
-												[generic/brightcove] Add a new test case for kijk.nl (#3541)

											
										
										
											10 years ago
+								            'add_ie': ['Brightcove'],
 								            'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
 								            'info_dict': {
 								                'id': '3866516442001',
-												[brightcove] Extract m3u8 formats (#3541)

											
										
										
											10 years ago
+								                'ext': 'mp4',
-												[generic/brightcove] Add a new test case for kijk.nl (#3541)

											
										
										
											10 years ago
+								                'title': 'Leer mij vrouwen kennen: Aflevering 1',
 								                'description': 'Leer mij vrouwen kennen: Aflevering 1',
 								                'uploader': 'SBS Broadcasting',
 								            },
-												[brightcove] Extract m3u8 formats (#3541)

											
										
										
											10 years ago
+								            'skip': 'Restricted to Netherlands',
-												[generic/brightcove] Add a new test case for kijk.nl (#3541)

											
										
										
											10 years ago
+								            'params': {
-												[brightcove] Extract m3u8 formats (#3541)

											
										
										
											10 years ago
+								                'skip_download': True,  # m3u8 download
-												[generic/brightcove] Add a new test case for kijk.nl (#3541)

											
										
										
											10 years ago
+								            },
 								        },
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											11 years ago
+								        # ooyala video
 								        {
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								            'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
-												[generic] Update some tests

											
										
										
											9 years ago
+								            'md5': '166dd577b433b4d4ebfee10b0824d8ff',
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								            'info_dict': {
 								                'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
 								                'ext': 'mp4',
-												[generic] Improve testcase

											
										
										
											10 years ago
+								                'title': '2cc213299525360.mov',  # that's what we get
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											11 years ago
+								            },
-												[generic] Update some tests

											
										
										
											9 years ago
+								            'add_ie': ['Ooyala'],
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											11 years ago
+								        },
-												[generic/ooyala] Add support for Ooyala embeds on SBN network websites (Fixes #4859)

											
										
										
											9 years ago
+								        # multiple ooyala embeds on SBN network websites
 								        {
 								            'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 								            'info_dict': {
 								                'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
 								                'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
 								            },
 								            'playlist_mincount': 3,
 								            'params': {
 								                'skip_download': True,
 								            },
 								            'add_ie': ['Ooyala'],
 								        },
-												Add support for embed.ly

											
										
										
											10 years ago
+								        # embed.ly video
 								        {
 								            'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
 								            'info_dict': {
 								                'id': '9ODmcdjQcHQ',
 								                'ext': 'mp4',
-												[generic] Add all test attributes for embedly (#2447)

In the future, we may want to not only print something, but throw an error for untested properties.

											
										
										
											10 years ago
+								                'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
 								                'upload_date': '20140225',
 								                'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
 								                'uploader': 'Tested',
 								                'uploader_id': 'testedcom',
-												Add support for embed.ly

											
										
										
											10 years ago
+								            },
 								            # No need to test YoutubeIE here
 								            'params': {
 								                'skip_download': True,
 								            },
 								        },
-												[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)

											
										
										
											10 years ago
+								        # funnyordie embed
 								        {
 								            'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
 								            'info_dict': {
 								                'id': '18e820ec3f',
 								                'ext': 'mp4',
 								                'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
 								                'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
-												[generic] Add support for embedded rutv player

											
										
										
											10 years ago
+								            },
-												[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)

											
										
										
											10 years ago
+								        },
-												[generic] Add BBC iPlayer playlist test

											
										
										
											9 years ago
+								        # BBC iPlayer embeds
 								        {
 								            'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
 								            'info_dict': {
 								                'title': 'BBC - Blogs -  Adam Curtis - BUGGER',
 								            },
 								            'playlist_mincount': 18,
 								        },
-												[generic] Add support for embedded rutv player

											
										
										
											10 years ago
+								        # RUTV embed
 								        {
 								            'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
 								            'info_dict': {
 								                'id': '776940',
 								                'ext': 'mp4',
 								                'title': 'Охотское море стало целиком российским',
 								                'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
 								            },
 								            'params': {
 								                # m3u8 download
 								                'skip_download': True,
 								            },
-												[ted] Simplify embed code (#2587)

											
										
										
											10 years ago
+								        },
-												[extractor/generic] Add test for tvc embed

											
										
										
											9 years ago
+								        # TVC embed
 								        {
 								            'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
 								            'info_dict': {
 								                'id': '55304',
 								                'ext': 'mp4',
 								                'title': 'Дошкольное воспитание',
 								            },
 								        },
-												[generic] Add test for sportbox embeds

											
										
										
											9 years ago
+								        # SportBox embed
 								        {
 								            'url': 'http://www.vestifinance.ru/articles/25753',
 								            'info_dict': {
 								                'id': '25753',
 								                'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
 								            },
 								            'playlist': [{
 								                'info_dict': {
 								                    'id': '370908',
 								                    'title': 'Госзаказ. День 3',
 								                    'ext': 'mp4',
 								                }
 								            }, {
 								                'info_dict': {
 								                    'id': '370905',
 								                    'title': 'Госзаказ. День 2',
 								                    'ext': 'mp4',
 								                }
 								            }, {
 								                'info_dict': {
 								                    'id': '370902',
 								                    'title': 'Госзаказ. День 1',
 								                    'ext': 'mp4',
 								                }
 								            }],
 								            'params': {
 								                # m3u8 download
 								                'skip_download': True,
 								            },
 								        },
-												[extractor/generic] Add test for myvi embed

											
										
										
											9 years ago
+								        # Myvi.ru embed
 								        {
 								            'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
 								            'info_dict': {
 								                'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
 								                'ext': 'mp4',
 								                'title': 'Ужастики, русский трейлер (2015)',
 								                'thumbnail': 're:^https?://.*\.jpg$',
 								                'duration': 153,
 								            }
 								        },
-												[extractor/generic] Add test for xhamster embed

											
										
										
											9 years ago
+								        # XHamster embed
 								        {
 								            'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
 								            'info_dict': {
 								                'id': 'showthread',
 								                'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
 								            },
 								            'playlist_mincount': 7,
 								        },
-												[ted] Simplify embed code (#2587)

											
										
										
											10 years ago
+								        # Embedded TED video
 								        {
 								            'url': 'http://en.support.wordpress.com/videos/ted-talks/',
-												[generic] Fix testcases

											
										
										
											10 years ago
+								            'md5': '65fdff94098e4a607385a60c5177c638',
-												[ted] Simplify embed code (#2587)

											
										
										
											10 years ago
+								            'info_dict': {
-												[generic] Fix testcases

											
										
										
											10 years ago
+								                'id': '1969',
-												[ted] Simplify embed code (#2587)

											
										
										
											10 years ago
+								                'ext': 'mp4',
-												[generic] Fix testcases

											
										
										
											10 years ago
+								                'title': 'Hidden miracles of the natural world',
 								                'uploader': 'Louie Schwartzberg',
 								                'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
-												[ted] Simplify embed code (#2587)

											
										
										
											10 years ago
+								            }
-												[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)

											
										
										
											10 years ago
+								        },
-												[UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694)

											
										
										
											10 years ago
+								        # Embeded Ustream video
 								        {
 								            'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
 								            'md5': '27b99cdb639c9b12a79bca876a073417',
 								            'info_dict': {
-												Fix _TEST for Ustream embed URLs

											
										
										
											10 years ago
+								                'id': '45734260',
 								                'ext': 'flv',
 								                'uploader': 'AU SPA:  The NSA and Privacy',
-												[UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694)

											
										
										
											10 years ago
+								                'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
 								            }
 								        },
-												[generic] Add nowvideo test hidden behind percent encoding

											
										
										
											10 years ago
+								        # nowvideo embed hidden behind percent encoding
 								        {
 								            'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
 								            'md5': '2baf4ddd70f697d94b1c18cf796d5107',
 								            'info_dict': {
 								                'id': '06e53103ca9aa',
 								                'ext': 'flv',
 								                'title': 'Macross Episode 001  Watch Macross Episode 001 onl',
 								                'description': 'No description',
 								            },
-												Merge remote-tracking branch 'dstftw/generic-webpage-unescape'

Conflicts:
	youtube_dl/extractor/generic.py

											
										
										
											10 years ago
+								        },
-												[arte] Add support for embedded videos (Fixes #2620)

											
										
										
											10 years ago
+								        # arte embed
 								        {
 								            'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
 								            'md5': '7653032cbb25bf6c80d80f217055fa43',
 								            'info_dict': {
 								                'id': '048195-004_PLUS7-F',
 								                'ext': 'flv',
 								                'title': 'X:enius',
 								                'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
 								                'upload_date': '20140320',
 								            },
 								            'params': {
 								                'skip_download': 'Requires rtmpdump'
 								            }
 								        },
-												[extractor/generic] Add support for francetv embeds

											
										
										
											9 years ago
+								        # francetv embed
 								        {
 								            'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
 								            'info_dict': {
 								                'id': 'EV_30231',
 								                'ext': 'mp4',
 								                'title': 'Alcaline, le concert avec Calogero',
 								                'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
 								                'upload_date': '20150226',
 								                'timestamp': 1424989860,
 								                'duration': 5400,
 								            },
 								            'params': {
 								                # m3u8 downloads
 								                'skip_download': True,
 								            },
 								            'expected_warnings': [
 								                'Forbidden'
 								            ]
 								        },
-												[condenast|generic] Add support for condenast embeds (Fixes #2783)

											
										
										
											10 years ago
+								        # Condé Nast embed
 								        {
 								            'url': 'http://www.wired.com/2014/04/honda-asimo/',
 								            'md5': 'ba0dfe966fa007657bd1443ee672db0f',
 								            'info_dict': {
 								                'id': '53501be369702d3275860000',
 								                'ext': 'mp4',
 								                'title': 'Honda’s  New Asimo Robot Is More Human Than Ever',
 								            }
-												[generic] Add support for protocol-independent URLs (Fixes #2810)

											
										
										
											10 years ago
+								        },
 								        # Dailymotion embed
 								        {
 								            'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
 								            'md5': '441aeeb82eb72c422c7f14ec533999cd',
 								            'info_dict': {
 								                'id': 'k2mm4bCdJ6CQ2i7c8o2',
 								                'ext': 'mp4',
 								                'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
 								                'uploader': 'Spi0n',
 								            },
 								            'add_ie': ['Dailymotion'],
-												[generic] Add support for <embed YouTube

											
										
										
											10 years ago
+								        },
 								        # YouTube embed
 								        {
 								            'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
 								            'info_dict': {
 								                'id': 'FXRb4ykk4S0',
 								                'ext': 'mp4',
 								                'title': 'The NBL Auction 2014',
 								                'uploader': 'BADMINTON England',
 								                'uploader_id': 'BADMINTONEvents',
 								                'upload_date': '20140603',
 								                'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
 								            },
 								            'add_ie': ['Youtube'],
 								            'params': {
 								                'skip_download': True,
 								            }
 								        },
-												[generic] Extract mtvservices embedded videos

											
										
										
											10 years ago
+								        # MTVSercices embed
 								        {
 								            'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
 								            'md5': '35727f82f58c76d996fc188f9755b0d5',
 								            'info_dict': {
 								                'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
 								                'ext': 'mp4',
 								                'title': 'Review',
 								                'description': 'Mario\'s life in the fast lane has never looked so good.',
 								            },
 								        },
-												Add a _TEST_

											
										
										
											10 years ago
+								        # YouTube embed via <data-embed-url="">
 								        {
 								            'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
 								            'info_dict': {
-												[generic] Fix testcases

											
										
										
											10 years ago
+								                'id': '4vAffPZIT44',
-												Add a _TEST_

											
										
										
											10 years ago
+								                'ext': 'mp4',
-												[generic] Fix testcases

											
										
										
											10 years ago
+								                'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
-												[generic] Simplify playlist support (#2948)

											
										
										
											10 years ago
+								                'uploader': 'Gameloft',
 								                'uploader_id': 'gameloft',
-												[generic] Fix testcases

											
										
										
											10 years ago
+								                'upload_date': '20140828',
 								                'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
-												[generic] Simplify playlist support (#2948)

											
										
										
											10 years ago
+								            },
 								            'params': {
 								                'skip_download': True,
-												Add a _TEST_

											
										
										
											10 years ago
+								            }
-												[generic] Add support for camtasia videos (Fixes #3574)

											
										
										
											10 years ago
+								        },
 								        # Camtasia studio
 								        {
 								            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 								            'playlist': [{
 								                'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
 								                'info_dict': {
 								                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 								                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
 								                    'ext': 'flv',
 								                    'duration': 2235.90,
 								                }
 								            }, {
 								                'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
 								                'info_dict': {
 								                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
 								                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
 								                    'ext': 'flv',
 								                    'duration': 2235.93,
 								                }
 								            }],
 								            'info_dict': {
 								                'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
 								            }
-												[generic] Automatic detection of flow player and age_limit (Fixes #3576)

											
										
										
											10 years ago
+								        },
 								        # Flowplayer
 								        {
 								            'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
 								            'md5': '9d65602bf31c6e20014319c7d07fba27',
 								            'info_dict': {
 								                'id': '5123ea6d5e5a7',
 								                'ext': 'mp4',
 								                'age_limit': 18,
 								                'uploader': 'www.handjobhub.com',
-												[generic] Fix test title

											
										
										
											10 years ago
+								                'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
-												[generic] Automatic detection of flow player and age_limit (Fixes #3576)

											
										
										
											10 years ago
+								            }
-												[generic] Fix rss under Python 2.x and move test to extractor

											
										
										
											10 years ago
+								        },
-												Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).

											
										
										
											10 years ago
+								        # Multiple brightcove videos
 								        # https://github.com/rg3/youtube-dl/issues/2283
 								        {
 								            'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
 								            'info_dict': {
 								                'id': 'always-never',
 								                'title': 'Always / Never - The New Yorker',
 								            },
 								            'playlist_count': 3,
 								            'params': {
 								                'extract_flat': False,
 								                'skip_download': True,
 								            }
-												[mlb] Add support for embedded videos (Closes #3653)

											
										
										
											10 years ago
+								        },
 								        # MLB embed
 								        {
 								            'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
 								            'md5': '96f09a37e44da40dd083e12d9a683327',
 								            'info_dict': {
 								                'id': '33322633',
 								                'ext': 'mp4',
 								                'title': 'Ump changes call to ball',
 								                'description': 'md5:71c11215384298a172a6dcb4c2e20685',
 								                'duration': 48,
 								                'timestamp': 1401537900,
 								                'upload_date': '20140531',
 								                'thumbnail': 're:^https?://.*\.jpg$',
 								            },
 								        },
-												[wistia] Use API and make more generic

											
										
										
											10 years ago
+								        # Wistia embed
 								        {
 								            'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
 								            'md5': '8788b683c777a5cf25621eaf286d0c23',
 								            'info_dict': {
 								                'id': '1cfaf6b7ea',
 								                'ext': 'mov',
 								                'title': 'md5:51364a8d3d009997ba99656004b5e20d',
 								                'duration': 643.0,
 								                'filesize': 182808282,
 								                'uploader': 'education-portal.com',
 								            },
 								        },
-												[thoughtworks] wistia support added

											
										
										
											10 years ago
+								        {
 								            'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
 								            'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
 								            'info_dict': {
 								                'id': 'uxjb0lwrcz',
 								                'ext': 'mp4',
-												[thoughtworks] wistia regex modified

											
										
										
											10 years ago
+								                'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
-												[thoughtworks] wistia support added

											
										
										
											10 years ago
+								                'duration': 1715.0,
-												[thoughtworks] wistia regex modified

											
										
										
											10 years ago
+								                'uploader': 'thoughtworks.wistia.com',
-												[generic] Add a test case for direct links with broken HEAD (#4032)

											
										
										
											10 years ago
+								            },
-												[thoughtworks] wistia support added

											
										
										
											10 years ago
+								        },
-												[generic] Allow soundcloud embeds with additional attributes

											
										
										
											10 years ago
+								        # Soundcloud embed
 								        {
 								            'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
 								            'info_dict': {
 								                'id': '174391317',
 								                'ext': 'mp3',
 								                'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
 								                'uploader': 'Sophos Security',
 								                'title': 'Chet Chat 171 - Oct 29, 2014',
 								                'upload_date': '20141029',
 								            }
-												[generic] Add support for livestream embeds (Fixes #4185)

											
										
										
											10 years ago
+								        },
 								        # Livestream embed
 								        {
 								            'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
 								            'info_dict': {
 								                'id': '67864563',
 								                'ext': 'flv',
 								                'upload_date': '20141112',
 								                'title': 'Rosetta #CometLanding webcast HL 10',
 								            }
 								        },
-												[generic] Add support for LazyYT embeds (Fixes #4306)

											
										
										
											10 years ago
+								        # LazyYT
 								        {
 								            'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
 								            'info_dict': {
-												Extend various playlist tests

											
										
										
											9 years ago
+								                'id': '1986',
-												[generic] Add support for LazyYT embeds (Fixes #4306)

											
										
										
											10 years ago
+								                'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
 								            },
 								            'playlist_mincount': 2,
-												[generic] Detect direct video links (Fixes #4149, #4313)

											
										
										
											10 years ago
+								        },
-												[cinchcast] Add new extractor (Fixes #4428)

											
										
										
											10 years ago
+								        # Cinchcast embed
 								        {
 								            'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
 								            'info_dict': {
 								                'id': '7141703',
 								                'ext': 'mp3',
 								                'upload_date': '20141126',
 								                'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
 								            }
 								        },
-												[generic] Add support for Cinerama player (Fixes #4752)

											
										
										
											9 years ago
+								        # Cinerama player
 								        {
 								            'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
 								            'info_dict': {
 								                'id': '730m_DandD_1901_512k',
 								                'ext': 'mp4',
 								                'uploader': 'www.abc.net.au',
 								                'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
 								            }
-												fixed viddler support - needed a Referer header; also added a viddler
generic extractor

											
										
										
											9 years ago
+								        },
 								        # embedded viddler video
 								        {
 								            'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
 								            'info_dict': {
 								                'id': '4d03aad9',
 								                'ext': 'mp4',
 								                'uploader': 'deadspin',
 								                'title': 'WALL-TO-GORTAT',
 								                'timestamp': 1422285291,
 								                'upload_date': '20150126',
 								            },
 								            'add_ie': ['Viddler'],
-												[generic] Add support for jwPlayer YouTube videos

This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)

											
										
										
											9 years ago
+								        },
-												[extractor/generic] Add test for Libsyn embed

											
										
										
											9 years ago
+								        # Libsyn embed
 								        {
 								            'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
 								            'info_dict': {
 								                'id': '3377616',
 								                'ext': 'mp3',
 								                'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
 								                'description': 'md5:601cb790edd05908957dae8aaa866465',
 								                'upload_date': '20150220',
 								            },
 								        },
-												[generic] Add support for jwPlayer YouTube videos

This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)

											
										
										
											9 years ago
+								        # jwplayer YouTube
 								        {
 								            'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
 								            'info_dict': {
 								                'id': 'Mrj4DVp2zeA',
 								                'ext': 'mp4',
-												[generic] Correct test case

Video has been reuploaded / edited

											
										
										
											9 years ago
+								                'upload_date': '20150212',
-												[generic] Add support for jwPlayer YouTube videos

This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)

											
										
										
											9 years ago
+								                'uploader': 'The National Archives UK',
 								                'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
 								                'uploader_id': 'NationalArchives08',
 								                'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
 								            },
-												[rtlnl|generic] Add support for rtl.nl embeds (Fixes #4959)

											
										
										
											9 years ago
+								        },
 								        # rtl.nl embed
 								        {
 								            'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
 								            'playlist_mincount': 5,
 								            'info_dict': {
 								                'id': 'aanslagen-kopenhagen',
 								                'title': 'Aanslagen Kopenhagen | RTL Nieuws',
 								            }
-												[generic] Add support for Zapiks embeds (#5014)

											
										
										
											9 years ago
+								        },
 								        # Zapiks embed
 								        {
 								            'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
 								            'info_dict': {
 								                'id': '118046',
 								                'ext': 'mp4',
 								                'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
 								            }
 								        },
-												[generic] Support dynamic Kaltura embeds (#5016) (#5073)

											
										
										
											9 years ago
+								        # Kaltura embed
 								        {
 								            'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
 								            'info_dict': {
 								                'id': '1_eergr3h1',
 								                'ext': 'mp4',
 								                'upload_date': '20150226',
 								                'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
 								                'timestamp': int,
 								                'title': 'John Carlson Postgame 2/25/15',
 								            },
 								        },
-												[extractor/generic] Improve kaltura embeds support (Closes #6137)

											
										
										
											9 years ago
+								        # Kaltura embed (different embed code)
 								        {
 								            'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
 								            'info_dict': {
 								                'id': '1_a52wc67y',
 								                'ext': 'flv',
 								                'upload_date': '20150127',
 								                'uploader_id': 'PremierMedia',
 								                'timestamp': int,
 								                'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
 								            },
 								        },
-												[eagleplatform] Add support for embeds

											
										
										
											9 years ago
+								        # Eagle.Platform embed (generic URL)
 								        {
 								            'url': 'http://lenta.ru/news/2015/03/06/navalny/',
 								            'info_dict': {
 								                'id': '227304',
 								                'ext': 'mp4',
 								                'title': 'Навальный вышел на свободу',
 								                'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
 								                'thumbnail': 're:^https?://.*\.jpg$',
 								                'duration': 87,
 								                'view_count': int,
 								                'age_limit': 0,
 								            },
 								        },
-												[eagleplatform] Add support for ClipYou embeds

											
										
										
											9 years ago
+								        # ClipYou (Eagle.Platform) embed (custom URL)
 								        {
 								            'url': 'http://muz-tv.ru/play/7129/',
 								            'info_dict': {
 								                'id': '12820',
 								                'ext': 'mp4',
 								                'title': "'O Sole Mio",
 								                'thumbnail': 're:^https?://.*\.jpg$',
 								                'duration': 216,
 								                'view_count': int,
 								            },
 								        },
-												[pladform] Add support for embeds

											
										
										
											9 years ago
+								        # Pladform embed
 								        {
 								            'url': 'http://muz-tv.ru/kinozal/view/7400/',
 								            'info_dict': {
 								                'id': '100183293',
 								                'ext': 'mp4',
-												[generic] Update pladform embed test

											
										
										
											9 years ago
+								                'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
-												[pladform] Add support for embeds

											
										
										
											9 years ago
+								                'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
 								                'thumbnail': 're:^https?://.*\.jpg$',
 								                'duration': 694,
 								                'age_limit': 0,
 								            },
 								        },
-												[generic] Add test for playwire embed (#5430)

											
										
										
											9 years ago
+								        # Playwire embed
 								        {
 								            'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
 								            'info_dict': {
 								                'id': '3519514',
 								                'ext': 'mp4',
 								                'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
 								                'thumbnail': 're:^https?://.*\.png$',
 								                'duration': 45.115,
 								            },
 								        },
-												[generic] Add support for 5min embeds (#5310)

											
										
										
											9 years ago
+								        # 5min embed
 								        {
 								            'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
 								            'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
 								            'info_dict': {
 								                'id': '518726732',
 								                'ext': 'mp4',
 								                'title': 'Facebook Creates "On This Day" | Crunch Report',
 								            },
 								        },
-												[extractor/generic] Add test for svt embed

											
										
										
											9 years ago
+								        # SVT embed
 								        {
 								            'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
 								            'info_dict': {
 								                'id': '2900353',
 								                'ext': 'flv',
 								                'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
 								                'duration': 27,
 								                'age_limit': 0,
 								            },
 								        },
-												[generic] Add tests for Crooks and Liars embeds

											
										
										
											9 years ago
+								        # Crooks and Liars embed
 								        {
 								            'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
 								            'info_dict': {
 								                'id': '8RUoRhRi',
 								                'ext': 'mp4',
 								                'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
 								                'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
 								                'timestamp': 1428207000,
 								                'upload_date': '20150405',
 								                'uploader': 'Heather',
 								            },
 								        },
 								        # Crooks and Liars external embed
 								        {
 								            'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
 								            'info_dict': {
 								                'id': 'MTE3MjUtMzQ2MzA',
 								                'ext': 'mp4',
 								                'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
 								                'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
 								                'timestamp': 1265032391,
 								                'upload_date': '20100201',
 								                'uploader': 'Heather',
 								            },
 								        },
-												[generic] Add working NBC Sports vplayer test

											
										
										
											9 years ago
+								        # NBC Sports vplayer embed
-												[NBC/ThePlatform/Generic] Add a generic detector for NBCSportsVPlayer and enhance error detection in ThePlatformIE

											
										
										
											9 years ago
+								        {
-												[generic] Add working NBC Sports vplayer test

											
										
										
											9 years ago
+								            'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
-												[NBC/ThePlatform/Generic] Add a generic detector for NBCSportsVPlayer and enhance error detection in ThePlatformIE

											
										
										
											9 years ago
+								            'info_dict': {
-												[generic] Add working NBC Sports vplayer test

											
										
										
											9 years ago
+								                'id': 'ln7x1qSThw4k',
 								                'ext': 'flv',
 								                'title': "PFT Live: New leader in the 'new-look' defense",
 								                'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
-												[NBC/ThePlatform/Generic] Add a generic detector for NBCSportsVPlayer and enhance error detection in ThePlatformIE

											
										
										
											9 years ago
+								            },
-												[udn] Add new extractor

											
										
										
											9 years ago
+								        },
 								        # UDN embed
 								        {
 								            'url': 'http://www.udn.com/news/story/7314/822787',
-												[generic] Fix test generic_51

The website replaced the original video with a new one

											
										
										
											9 years ago
+								            'md5': 'fd2060e988c326991037b9aff9df21a6',
-												[udn] Add new extractor

											
										
										
											9 years ago
+								            'info_dict': {
-												[generic] Fix test generic_51

The website replaced the original video with a new one

											
										
										
											9 years ago
+								                'id': '300346',
-												[udn] Add new extractor

											
										
										
											9 years ago
+								                'ext': 'mp4',
-												[generic] Fix test generic_51

The website replaced the original video with a new one

											
										
										
											9 years ago
+								                'title': '中一中男師變性 全校師生力挺',
-												[udn] Add new extractor

											
										
										
											9 years ago
+								                'thumbnail': 're:^https?://.*\.jpg$',
 								            }
-												[generic] Support another type of Ooyala embedded video

											
										
										
											9 years ago
+								        },
 								        # Ooyala embed
 								        {
 								            'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
 								            'info_dict': {
 								                'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
 								                'ext': 'mp4',
 								                'description': 'VIDEO: Index/Match versus VLOOKUP.',
 								                'title': 'This is what separates the Excel masters from the wannabes',
 								            },
 								            'params': {
 								                # m3u8 downloads
 								                'skip_download': True,
 								            }
-												[generic] Extract videos from SMIL manifests (closes #5145 and fixes #5135)

											
										
										
											9 years ago
+								        },
 								        # Contains a SMIL manifest
 								        {
 								            'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
 								            'info_dict': {
 								                'id': 'file',
 								                'ext': 'flv',
 								                'title': '+ Football: Lottery Champions League Europe',
 								                'uploader': 'www.telewebion.com',
 								            },
 								            'params': {
 								                # rtmpe downloads
 								                'skip_download': True,
 								            }
-												[brightcove] Allow single quotes in Brightcove URLs (fixes #5901)

											
										
										
											9 years ago
+								        },
 								        # Brightcove URL in single quotes
 								        {
 								            'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
 								            'md5': '4ae374f1f8b91c889c4b9203c8c752af',
 								            'info_dict': {
 								                'id': '4255764656001',
 								                'ext': 'mp4',
 								                'title': 'SN Presents: Russell Martin, World Citizen',
 								                'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
 								                'uploader': 'Rogers Sportsnet',
 								            },
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											9 years ago
+								        },
 								        # Dailymotion Cloud video
 								        {
 								            'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
 								            'md5': '49444254273501a64675a7e68c502681',
 								            'info_dict': {
 								                'id': '5585de919473990de4bee11b',
 								                'ext': 'mp4',
 								                'title': 'Le débat',
 								                'thumbnail': 're:^https?://.*\.jpe?g$',
 								            }
-												[generic/adobetv] Support AdobeTVVideo embeds (#6039)

											
										
										
											9 years ago
+								        },
-												[extractor/generic] Add test for OnionStudios embeds

											
										
										
											9 years ago
+								        # OnionStudios embed
 								        {
 								            'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
 								            'info_dict': {
 								                'id': '2855',
 								                'ext': 'mp4',
 								                'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
 								                'thumbnail': 're:^https?://.*\.jpe?g$',
 								                'uploader': 'ClickHole',
 								                'uploader_id': 'clickhole',
 								            }
 								        },
-												[extractor/generic] Add test for snagfilms embeds

											
										
										
											9 years ago
+								        # SnagFilms embed
 								        {
 								            'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
 								            'info_dict': {
 								                'id': '74849a00-85a9-11e1-9660-123139220831',
 								                'ext': 'mp4',
 								                'title': '#whilewewatch',
 								            }
 								        },
-												[generic/adobetv] Support AdobeTVVideo embeds (#6039)

											
										
										
											9 years ago
+								        # AdobeTVVideo embed
 								        {
 								            'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
 								            'md5': '43662b577c018ad707a63766462b1e87',
 								            'info_dict': {
 								                'id': '2456',
 								                'ext': 'mp4',
 								                'title': 'New experience with Acrobat DC',
 								                'description': 'New experience with Acrobat DC',
 								                'duration': 248.667,
 								            },
-												[generic] Parse RSS enclosure URLs (Fixes #5091)

											
										
										
											9 years ago
+								        }
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											11 years ago
+								    ]
-												Move GenericIE into its own file

											
										
										
											11 years ago
 								    def report_following_redirect(self, new_url):
 								        """Report information extraction."""
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								        self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
-												Move GenericIE into its own file

											
										
										
											11 years ago
-												[generic] Add support for RSS feeds (Fixes #667)

											
										
										
											10 years ago
+								    def _extract_rss(self, url, video_id, doc):
 								        playlist_title = doc.find('./channel/title').text
 								        playlist_desc_el = doc.find('./channel/description')
 								        playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
-												[generic] Parse RSS enclosure URLs (Fixes #5091)

											
										
										
											9 years ago
+								        entries = []
 								        for it in doc.findall('./channel/item'):
 								            next_url = xpath_text(it, 'link', fatal=False)
 								            if not next_url:
 								                enclosure_nodes = it.findall('./enclosure')
 								                for e in enclosure_nodes:
 								                    next_url = e.attrib.get('url')
 								                    if next_url:
 								                        break
 								            if not next_url:
 								                continue
 								            entries.append({
 								                '_type': 'url',
 								                'url': next_url,
 								                'title': it.find('title').text,
 								            })
-												[generic] Add support for RSS feeds (Fixes #667)

											
										
										
											10 years ago
 								        return {
 								            '_type': 'playlist',
 								            'id': url,
 								            'title': playlist_title,
 								            'description': playlist_desc,
 								            'entries': entries,
 								        }
-												[generic] Add support for camtasia videos (Fixes #3574)

											
										
										
											10 years ago
+								    def _extract_camtasia(self, url, video_id, webpage):
 								        """ Returns None if no camtasia video can be found. """
 								        camtasia_cfg = self._search_regex(
 								            r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
 								            webpage, 'camtasia configuration file', default=None)
 								        if camtasia_cfg is None:
 								            return None
 								        title = self._html_search_meta('DC.title', webpage, fatal=True)
 								        camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
 								        camtasia_cfg = self._download_xml(
 								            camtasia_url, video_id,
 								            note='Downloading camtasia configuration',
 								            errnote='Failed to download camtasia configuration')
 								        fileset_node = camtasia_cfg.find('./playlist/array/fileset')
 								        entries = []
 								        for n in fileset_node.getchildren():
 								            url_n = n.find('./uri')
 								            if url_n is None:
 								                continue
 								            entries.append({
 								                'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
 								                'title': '%s - %s' % (title, n.tag),
 								                'url': compat_urlparse.urljoin(url, url_n.text),
 								                'duration': float_or_none(n.find('./duration').text),
 								            })
 								        return {
 								            '_type': 'playlist',
 								            'entries': entries,
 								            'title': title,
 								        }
-												Move GenericIE into its own file

											
										
										
											11 years ago
+								    def _real_extract(self, url):
-												[generic] Add support for protocol-independent URLs (Fixes #2810)

											
										
										
											10 years ago
+								        if url.startswith('//'):
 								            return {
 								                '_type': 'url',
-												[soundcloud/generic] Add support for playlists

											
										
										
											10 years ago
+								                'url': self.http_scheme() + url,
-												[generic] Add support for protocol-independent URLs (Fixes #2810)

											
										
										
											10 years ago
+								            }
-												[generic] If the url doesn't specify the protocol, then try to extract prepending 'http://'

											
										
										
											11 years ago
+								        parsed_url = compat_urlparse.urlparse(url)
 								        if not parsed_url.scheme:
-												Add new --default-search option (#2193)

											
										
										
											10 years ago
+								            default_search = self._downloader.params.get('default_search')
 								            if default_search is None:
-												[generic] Add --default-search fixup_error

This restores the ability to enter URLs without a scheme (and default to http), but still fail if the input is a search term.

											
										
										
											10 years ago
+								                default_search = 'fixup_error'
-												Add new --default-search option (#2193)

											
										
										
											10 years ago
-												[generic] Add --default-search fixup_error

This restores the ability to enter URLs without a scheme (and default to http), but still fail if the input is a search term.

											
										
										
											10 years ago
+								            if default_search in ('auto', 'auto_warning', 'fixup_error'):
-												Add new --default-search option (#2193)

											
										
										
											10 years ago
+								                if '/' in url:
 								                    self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 								                    return self.url_result('http://' + url)
-												[generic] Add --default-search fixup_error

This restores the ability to enter URLs without a scheme (and default to http), but still fail if the input is a search term.

											
										
										
											10 years ago
+								                elif default_search != 'fixup_error':
-												[generic] Warn before fallback to automatic search

											
										
										
											10 years ago
+								                    if default_search == 'auto_warning':
-												[generic] Abort if user passes in URL "url" (#2942)

											
										
										
											10 years ago
+								                        if re.match(r'^(?:url|URL)$', url):
 								                            raise ExtractorError(
 								                                'Invalid URL:  %r . Call youtube-dl like this:  youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc"  ' % url,
 								                                expected=True)
 								                        else:
 								                            self._downloader.report_warning(
-												[generic] Set default-search to error

This prevents users from submitting bug reports where they mistyped a URL, and prevents me from getting a weird video when holding shift and thus searching for :Tds

											
										
										
											10 years ago
+								                                'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url)
-												Add new --default-search option (#2193)

											
										
										
											10 years ago
+								                    return self.url_result('ytsearch:' + url)
-												[generic] Add --default-search fixup_error

This restores the ability to enter URLs without a scheme (and default to http), but still fail if the input is a search term.

											
										
										
											10 years ago
 								            if default_search in ('error', 'fixup_error'):
-												[generic] Set default-search to error

This prevents users from submitting bug reports where they mistyped a URL, and prevents me from getting a weird video when holding shift and thus searching for :Tds

											
										
										
											10 years ago
+								                raise ExtractorError(
-												Fix all PEP8 issues except E501

											
										
										
											10 years ago
+								                    '%r is not a valid URL. '
 								                    'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
 								                    % (url, url), expected=True)
-												Add new --default-search option (#2193)

											
										
										
											10 years ago
+								            else:
-												[generic] Allow --default-search without colon

											
										
										
											10 years ago
+								                if ':' not in default_search:
 								                    default_search += ':'
-												Add new --default-search option (#2193)

											
										
										
											10 years ago
+								                return self.url_result(default_search + url)
-												[ministrygrid] Add extractor (Fixes #2900)

											
										
										
											10 years ago
 								        url, smuggled_data = unsmuggle_url(url)
 								        force_videoid = None
-												[vimeo:likes] Add new extractor (Fixes #3835)

											
										
										
											10 years ago
+								        is_intentional = smuggled_data and smuggled_data.get('to_generic')
-												[ministrygrid] Add extractor (Fixes #2900)

											
										
										
											10 years ago
+								        if smuggled_data and 'force_videoid' in smuggled_data:
 								            force_videoid = smuggled_data['force_videoid']
 								            video_id = force_videoid
 								        else:
-												[extractor/generic] Use compat_urllib_parse_unquote for unquoting video_id and title from URL

											
										
										
											9 years ago
+								            video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
-												[generic] If the url doesn't specify the protocol, then try to extract prepending 'http://'

											
										
										
											11 years ago
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								        self.to_screen('%s: Requesting header' % video_id)
-												[generic] Output something before making network requests

											
										
										
											10 years ago
-												[generic] Use default opener for HEAD request (Fixes #3528)

											
										
										
											10 years ago
+								        head_req = HEADRequest(url)
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											10 years ago
+								        head_response = self._request_webpage(
-												[generic] Use default opener for HEAD request (Fixes #3528)

											
										
										
											10 years ago
+								            head_req, video_id,
 								            note=False, errnote='Could not send HEAD request to %s' % url,
 								            fatal=False)
-												Add support for direct links to a video (#1973)

											
										
										
											11 years ago
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											10 years ago
+								        if head_response is not False:
-												Add support for direct links to a video (#1973)

											
										
										
											11 years ago
+								            # Check for redirect
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											10 years ago
+								            new_url = head_response.geturl()
-												Add support for direct links to a video (#1973)

											
										
										
											11 years ago
+								            if url != new_url:
 								                self.report_following_redirect(new_url)
-												[ministrygrid] Add extractor (Fixes #2900)

											
										
										
											10 years ago
+								                if force_videoid:
 								                    new_url = smuggle_url(
 								                        new_url, {'force_videoid': force_videoid})
-												[generic] Do not use compatibility result fallback

											
										
										
											11 years ago
+								                return self.url_result(new_url)
-												Add support for direct links to a video (#1973)

											
										
										
											11 years ago
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											10 years ago
+								        full_response = None
 								        if head_response is False:
-												[extractor/generic] Force Accept-Encoding to any for extraction pass

											
										
										
											9 years ago
+								            request = compat_urllib_request.Request(url)
 								            request.add_header('Accept-Encoding', '*')
 								            full_response = self._request_webpage(request, video_id)
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											10 years ago
+								            head_response = full_response
 								        # Check for direct link to a video
 								        content_type = head_response.headers.get('Content-Type', '')
 								        m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
 								        if m:
 								            upload_date = unified_strdate(
 								                head_response.headers.get('Last-Modified'))
 								            return {
 								                'id': video_id,
-												[extractor/generic] Use compat_urllib_parse_unquote for unquoting video_id and title from URL

											
										
										
											9 years ago
+								                'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
-												[generic] indicate when a direct video has been extracted

Fixes #4052.

											
										
										
											10 years ago
+								                'direct': True,
-												[generic] Handle audio streams that do not implement HEAD (Fixes #4032)

											
										
										
											10 years ago
+								                'formats': [{
 								                    'format_id': m.group('format_id'),
 								                    'url': url,
 								                    'vcodec': 'none' if m.group('type') == 'audio' else None
 								                }],
 								                'upload_date': upload_date,
 								            }
-												Add support for direct links to a video (#1973)

											
										
										
											11 years ago
-												[vimeo:likes] Add new extractor (Fixes #3835)

											
										
										
											10 years ago
+								        if not self._downloader.params.get('test', False) and not is_intentional:
-												[extractor/generic] Clarify generic extraction warning

											
										
										
											9 years ago
+								            force = self._downloader.params.get('force_generic_extractor', False)
 								            self._downloader.report_warning(
 								                '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
-												[vimeo:likes] Add new extractor (Fixes #3835)

											
										
										
											10 years ago
-												[generic] Detect direct video links (Fixes #4149, #4313)

											
										
										
											10 years ago
+								        if not full_response:
-												[extractor/generic] Force Accept-Encoding to any for extraction pass

											
										
										
											9 years ago
+								            request = compat_urllib_request.Request(url)
 								            # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
 								            # making it impossible to download only chunk of the file (yet we need only 512kB to
 								            # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
 								            # that will always result in downloading the whole file that is not desirable.
 								            # Therefore for extraction pass we have to override Accept-Encoding to any in order
 								            # to accept raw bytes and being able to download only a chunk.
 								            # It may probably better to solve this by checking Content-Type for application/octet-stream
 								            # after HEAD request finishes, but not sure if we can rely on this.
 								            request.add_header('Accept-Encoding', '*')
 								            full_response = self._request_webpage(request, video_id)
-												[generic] Detect direct video links (Fixes #4149, #4313)

											
										
										
											10 years ago
 								        # Maybe it's a direct link to a video?
 								        # Be careful not to download the whole thing!
 								        first_bytes = full_response.read(512)
-												[generic] Add support for BOMs (Fixes #4753)

											
										
										
											9 years ago
+								        if not is_html(first_bytes):
-												[generic] Detect direct video links (Fixes #4149, #4313)

											
										
										
											10 years ago
+								            self._downloader.report_warning(
 								                'URL could be a direct video link, returning it as such.')
 								            upload_date = unified_strdate(
 								                head_response.headers.get('Last-Modified'))
 								            return {
 								                'id': video_id,
-												[extractor/generic] Use compat_urllib_parse_unquote for unquoting video_id and title from URL

											
										
										
											9 years ago
+								                'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
-												[generic] Detect direct video links (Fixes #4149, #4313)

											
										
										
											10 years ago
+								                'direct': True,
 								                'url': url,
 								                'upload_date': upload_date,
 								            }
 								        webpage = self._webpage_read_content(
 								            full_response, url, video_id, prefix=first_bytes)
-												Move GenericIE into its own file

											
										
										
											11 years ago
+								        self.report_extraction(video_id)
-												Support multiple embedded YouTube URLs (Fixes #1787)

											
										
										
											11 years ago
-												[generic] Add support for RSS feeds (Fixes #667)

											
										
										
											10 years ago
+								        # Is it an RSS feed?
 								        try:
-												[generic] Suppress warning about doctypes in RSS parser

											
										
										
											10 years ago
+								            doc = parse_xml(webpage)
-												[generic] Add support for RSS feeds (Fixes #667)

											
										
										
											10 years ago
+								            if doc.tag == 'rss':
 								                return self._extract_rss(url, video_id, doc)
-												[generic] Fix on python 2.6

`ParseError` is not available, it raises `xml.parsers.expat.ExpatError`.
The webpage needs to be encoded.

											
										
										
											10 years ago
+								        except compat_xml_parse_error:
-												[generic] Add support for RSS feeds (Fixes #667)

											
										
										
											10 years ago
+								            pass
-												[generic] Add support for camtasia videos (Fixes #3574)

											
										
										
											10 years ago
+								        # Is it a Camtasia project?
 								        camtasia_res = self._extract_camtasia(url, video_id, webpage)
 								        if camtasia_res is not None:
 								            return camtasia_res
-												[generic] Add comment for unescaping webpage contents

											
										
										
											10 years ago
+								        # Sometimes embedded video player is hidden behind percent encoding
 								        # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
 								        # Unescaping the whole page allows to handle those cases in a generic way
-												Generic: use compat_urllib_parse_unquote to prevent utf8 mangling
of the entire page in python 2.

-requires- fixed compat_urllib_parse_unquote

example - the following will save with a mangled playlist title,
 instead of the kanji for 'tsunami'. This affects all utf8encoded
 urls as well

youtube-dl -f18 -o '%(playlist_title)s-%(title)s.%(ext)s' \
  https://gist.githubusercontent.com/atomicdryad/fcb97465e6060fc519e1/raw/61c14c1e3a4985471dcf56c281d24d7e781a4e0e/tsunami.html

											
										
										
											9 years ago
+								        webpage = compat_urllib_parse_unquote(webpage)
-												[generic] Unescape webpage contents
											
										
										
											10 years ago
-												Support multiple embedded YouTube URLs (Fixes #1787)

											
										
										
											11 years ago
+								        # it's tempting to parse this further, but you would
 								        # have to take into account all the variations like
 								        #   Video Title - Site Name
 								        #   Site Name | Video Title
 								        #   Video Title - Tagline | Site Name
 								        # and so on and so forth; it's just not practical
-												[wistia] Add extractor

											
										
										
											11 years ago
+								        video_title = self._html_search_regex(
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								            r'(?s)<title>(.*?)</title>', webpage, 'video title',
 								            default='video')
-												[wistia] Add extractor

											
										
										
											11 years ago
-												[generic] Automatic detection of flow player and age_limit (Fixes #3576)

											
										
										
											10 years ago
+								        # Try to detect age limit automatically
 								        age_limit = self._rta_search(webpage)
 								        # And then there are the jokers who advertise that they use RTA,
 								        # but actually don't.
 								        AGE_LIMIT_MARKERS = [
 								            r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
 								        ]
 								        if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
 								            age_limit = 18
-												[wistia] Add extractor

											
										
										
											11 years ago
+								        # video uploader is domain name
 								        video_uploader = self._search_regex(
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
-												Support multiple embedded YouTube URLs (Fixes #1787)

											
										
										
											11 years ago
-												[generic] Simplify playlist support (#2948)

											
										
										
											10 years ago
+								        # Helper method
-												[generic] Make getter None by default

											
										
										
											9 years ago
+								        def _playlist_from_matches(matches, getter=None, ie=None):
-												[generic] Allow embedded YoutubePlaylists (Fixes #3821)

											
										
										
											10 years ago
+								            urlrs = orderedSet(
-												[generic] Make getter None by default

											
										
										
											9 years ago
+								                self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
-												[generic] Allow embedded YoutubePlaylists (Fixes #3821)

											
										
										
											10 years ago
+								                for m in matches)
-												[generic] Simplify playlist support (#2948)

											
										
										
											10 years ago
+								            return self.playlist_result(
 								                urlrs, playlist_id=video_id, playlist_title=video_title)
-												[generic] small typo

											
										
										
											11 years ago
+								        # Look for BrightCove:
-												[generic] Add support for multiple brightcove URLs (Fixes #2283)

											
										
										
											10 years ago
+								        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
 								        if bc_urls:
-												[generic] Use unicode_literals instead of duplicating the u'

											
										
										
											10 years ago
+								            self.to_screen('Brightcove video detected.')
-												[generic] Add support for multiple brightcove URLs (Fixes #2283)

											
										
										
											10 years ago
+								            entries = [{
 								                '_type': 'url',
 								                'url': smuggle_url(bc_url, {'Referer': url}),
 								                'ie_key': 'Brightcove'
 								            } for bc_url in bc_urls]
 								            return {
 								                '_type': 'playlist',
 								                'title': video_title,
 								                'id': video_id,
 								                'entries': entries,
 								            }
-												GenericIE: Detect videos from Brightcove

Brightcove videos info is usually found in an <object class="BrightcoveExperience"></object> node, this is passed to a new method of BrightcoveIE that builds a url to extract the video.

											
										
										
											11 years ago
-												[rtlnl|generic] Add support for rtl.nl embeds (Fixes #4959)

											
										
										
											9 years ago
+								        # Look for embedded rtl.nl player
 								        matches = re.findall(
-												[generic] Improve rtl.nl embeds detection (Closes #5950)

											
										
										
											9 years ago
+								            r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
-												[rtlnl|generic] Add support for rtl.nl embeds (Fixes #4959)

											
										
										
											9 years ago
+								            webpage)
 								        if matches:
 								            return _playlist_from_matches(matches, ie='RtlNl')
-												[vimeo/generic] Move detection logic from GenericIE to VimeoIE

											
										
										
											9 years ago
+								        vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
 								        if vimeo_url is not None:
 								            return self.url_result(vimeo_url)
-												[vimeo/generic] Add support for embedded SWF vimeo videos

											
										
										
											11 years ago
-												[extractor/generic] Support vid.me embeds

											
										
										
											9 years ago
+								        vid_me_embed_url = self._search_regex(
 								            r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
 								            webpage, 'vid.me embed', default=None)
 								        if vid_me_embed_url is not None:
 								            return self.url_result(vid_me_embed_url, 'Vidme')
-												Check for embedded YouTube player (Fixes #1616)

											
										
										
											11 years ago
+								        # Look for embedded YouTube player
-												[generic] Support YouTube swf embed (Fixes #2010)

											
										
										
											11 years ago
+								        matches = re.findall(r'''(?x)
-												[generic] Add support for <embed YouTube

											
										
										
											10 years ago
+								            (?:
 								                <iframe[^>]+?src=|
-												Merge remote-tracking branch 'anovicecodemonkey/generic-data-video-url'

Conflicts:
	youtube_dl/extractor/generic.py

											
										
										
											10 years ago
+								                data-video-url=|
-												[generic] Add support for <embed YouTube

											
										
										
											10 years ago
+								                <embed[^>]+?src=|
-												[generic] Allow new SWFObject()-style imports

This embed style is used on http://www.bitburger-open.de/ , but that is not included as a test case since the format is likely to be temporary.

											
										
										
											10 years ago
+								                embedSWF\(?:\s*|
 								                new\s+SWFObject\(
-												[generic] Add support for <embed YouTube

											
										
										
											10 years ago
+								            )
 								            (["\'])
-												[generic/youtube] Recognize youtube nocookie embeds (Closes #3713)

											
										
										
											10 years ago
+								                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
-												[youtube] Support for embedded /p players (Fixes #3821)

											
										
										
											10 years ago
+								                (?:embed|v|p)/.+?)
-												[generic] Support YouTube swf embed (Fixes #2010)

											
										
										
											11 years ago
+								            \1''', webpage)
-												Support multiple embedded YouTube URLs (Fixes #1787)

											
										
										
											11 years ago
+								        if matches:
-												[generic] Simplify playlist support (#2948)

											
										
										
											10 years ago
+								            return _playlist_from_matches(
-												[generic] Allow embedded YoutubePlaylists (Fixes #3821)

											
										
										
											10 years ago
+								                matches, lambda m: unescapeHTML(m[1]))
-												Check for embedded YouTube player (Fixes #1616)

											
										
										
											11 years ago
-												[generic] Add support for LazyYT embeds (Fixes #4306)

											
										
										
											10 years ago
+								        # Look for lazyYT YouTube embed
 								        matches = re.findall(
 								            r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
 								        if matches:
 								            return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
-												[generic] Find embedded dailymotion videos (Fixes #1848)

											
										
										
											11 years ago
+								        # Look for embedded Dailymotion player
 								        matches = re.findall(
-												[wistia] Add extractor

											
										
										
											11 years ago
+								            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
-												[generic] Find embedded dailymotion videos (Fixes #1848)

											
										
										
											11 years ago
+								        if matches:
-												[generic] Simplify playlist support (#2948)

											
										
										
											10 years ago
+								            return _playlist_from_matches(
 								                matches, lambda m: unescapeHTML(m[1]))
-												[generic] Find embedded dailymotion videos (Fixes #1848)

											
										
										
											11 years ago
-												[generic] Support embedded Dailymotion playlists (fixes #3822)

											
										
										
											10 years ago
+								        # Look for embedded Dailymotion playlist player (#3822)
 								        m = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
 								        if m:
 								            playlists = re.findall(
 								                r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
 								            if playlists:
 								                return _playlist_from_matches(
 								                    playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
-												[wistia] Add extractor

											
										
										
											11 years ago
+								        # Look for embedded Wistia player
 								        match = re.search(
-												[generic/wistia] Improve regex

											
										
										
											10 years ago
+								            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
-												[wistia] Add extractor

											
										
										
											11 years ago
+								        if match:
-												[generic] Make sure Wistia embed URLs contain the protocol (Closes #3977)

Also, improve detection (Addresses #3662)

											
										
										
											10 years ago
+								            embed_url = self._proto_relative_url(
 								                unescapeHTML(match.group('url')))
-												[wistia] Add extractor

											
										
										
											11 years ago
+								            return {
 								                '_type': 'url_transparent',
-												[generic] Make sure Wistia embed URLs contain the protocol (Closes #3977)

Also, improve detection (Addresses #3662)

											
										
										
											10 years ago
+								                'url': embed_url,
-												[wistia] Add extractor

											
										
										
											11 years ago
+								                'ie_key': 'Wistia',
 								                'uploader': video_uploader,
 								                'title': video_title,
 								                'id': video_id,
 								            }
-												PEP8 applied

											
										
										
											10 years ago
-												[generic] Make sure Wistia embed URLs contain the protocol (Closes #3977)

Also, improve detection (Addresses #3662)

											
										
										
											10 years ago
+								        match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
-												[wistia] Use API and make more generic

											
										
										
											10 years ago
+								        if match:
 								            return {
 								                '_type': 'url_transparent',
 								                'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
 								                'ie_key': 'Wistia',
 								                'uploader': video_uploader,
 								                'title': video_title,
 								                'id': match.group('id')
 								            }
-												[wistia] Add extractor

											
										
										
											11 years ago
-												[GenericIE] Add support for embedded blip.tv

											
										
										
											11 years ago
+								        # Look for embedded blip.tv player
-												[Cinemassacre] Add detection for videos from blip.tv

											
										
										
											9 years ago
+								        bliptv_url = BlipTVIE._extract_url(webpage)
 								        if bliptv_url:
 								            return self.url_result(bliptv_url, 'BlipTV')
-												[GenericIE] Add support for embedded blip.tv

											
										
										
											11 years ago
-												[extractor/generic] Add support for svt embeds (Closes #5622)

											
										
										
											9 years ago
+								        # Look for SVT player
 								        svt_url = SVTIE._extract_url(webpage)
 								        if svt_url:
 								            return self.url_result(svt_url, 'SVT')
-												[condenast|generic] Add support for condenast embeds (Fixes #2783)

											
										
										
											10 years ago
+								        # Look for embedded condenast player
 								        matches = re.findall(
 								            r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
 								            webpage)
 								        if matches:
 								            return {
 								                '_type': 'playlist',
 								                'entries': [{
 								                    '_type': 'url',
 								                    'ie_key': 'CondeNast',
 								                    'url': ma,
 								                } for ma in matches],
 								                'title': video_title,
 								                'id': video_id,
 								            }
-												[generic] Detect bandcamp pages that use custom domains (closes #1662)

They embed the original url in the 'og:url' property.

											
										
										
											11 years ago
+								        # Look for Bandcamp pages with custom domain
 								        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
 								        if mobj is not None:
 								            burl = unescapeHTML(mobj.group(1))
-												[bandcamp] add support for albums (reported in #1270)

											
										
										
											11 years ago
+								            # Don't set the extractor because it can be a track url or an album
 								            return self.url_result(burl)
-												[generic] Detect bandcamp pages that use custom domains (closes #1662)

They embed the original url in the 'og:url' property.

											
										
										
											11 years ago
-												Add support for embedded vevo player (Fixes #1957)

											
										
										
											11 years ago
+								        # Look for embedded Vevo player
 								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												fixed viddler support - needed a Referer header; also added a viddler
generic extractor

											
										
										
											9 years ago
 								        # Look for embedded Viddler player
-												[generic] Improve some regexes

											
										
										
											9 years ago
+								        mobj = re.search(
 								            r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
 								            webpage)
-												fixed viddler support - needed a Referer header; also added a viddler
generic extractor

											
										
										
											9 years ago
+								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												Add support for embedded vevo player (Fixes #1957)

											
										
										
											11 years ago
-												[generic] Add support for nytimes embeds (Closes #5234)

											
										
										
											9 years ago
+								        # Look for NYTimes player
 								        mobj = re.search(
 								            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												[extractor/generic] Support Libsyn embeds

											
										
										
											9 years ago
+								        # Look for Libsyn player
 								        mobj = re.search(
 								            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											11 years ago
+								        # Look for Ooyala videos
-												[generic] Improve some regexes

											
										
										
											9 years ago
+								        mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
-												[generic/ooyala] Add support for Ooyala embeds on SBN network websites (Fixes #4859)

											
										
										
											9 years ago
+								                re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
-												[generic] Support another type of Ooyala embedded video

											
										
										
											9 years ago
+								                re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
 								                re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											11 years ago
+								        if mobj is not None:
-												[generic] Recognize more Ooyala embedded videos (#2569)

											
										
										
											10 years ago
+								            return OoyalaIE._build_url_result(mobj.group('ec'))
-												[generic] Detect ooyala videos (fixes #2013)

											
										
										
											11 years ago
-												[generic/ooyala] Add support for Ooyala embeds on SBN network websites (Fixes #4859)

											
										
										
											9 years ago
+								        # Look for multiple Ooyala embeds on SBN network websites
 								        mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
 								        if mobj is not None:
 								            embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
 								            if embeds:
 								                return _playlist_from_matches(
 								                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
-												[aparat] Add support (Fixes #2012)

											
										
										
											11 years ago
+								        # Look for Aparat videos
-												[generic] Be more relaxed when looking for aparat embeds (Fixes #2784)

											
										
										
											10 years ago
+								        mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
-												[aparat] Add support (Fixes #2012)

											
										
										
											11 years ago
+								        if mobj is not None:
 								            return self.url_result(mobj.group(1), 'Aparat')
-												[mpora] Add support (Fixes #2096)

											
										
										
											10 years ago
+								        # Look for MPORA videos
-												Improve some regexes for embedded players
											
										
										
											10 years ago
+								        mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
-												[mpora] Add support (Fixes #2096)

											
										
										
											10 years ago
+								        if mobj is not None:
 								            return self.url_result(mobj.group(1), 'Mpora')
-												[novamov] Remove superfluous tabs
											
										
										
											10 years ago
-												[generic] Generalize novamov based embeds

											
										
										
											10 years ago
+								        # Look for embedded NovaMov-based player
-												[novamov] Add embedded player support

											
										
										
											10 years ago
+								        mobj = re.search(
-												[generic] Support pagespeed_iframe for NovaMov embeds

											
										
										
											10 years ago
+								            r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
-												[generic] Generalize novamov based embeds

											
										
										
											10 years ago
+								                    (?P<url>http://(?:(?:embed|www)\.)?
 								                        (?:novamov\.com|
 								                           nowvideo\.(?:ch|sx|eu|at|ag|co)|
 								                           videoweed\.(?:es|com)|
 								                           movshare\.(?:net|sx|ag)|
 								                           divxstage\.(?:eu|net|ch|co|at|ag))
 								                        /embed\.php.+?)\1''', webpage)
-												[novamov] Add embedded player support

											
										
										
											10 years ago
+								        if mobj is not None:
-												[generic] Generalize novamov based embeds

											
										
										
											10 years ago
+								            return self.url_result(mobj.group('url'))
-												[generic] Add support for videoweed embeds

											
										
										
											10 years ago
-												[facebook] Add support for embeds

Example URL: http://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html

											
										
										
											10 years ago
+								        # Look for embedded Facebook player
 								        mobj = re.search(
-												[huffpost] Add support

											
										
										
											10 years ago
+								            r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
-												[facebook] Add support for embeds

Example URL: http://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html

											
										
										
											10 years ago
+								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Facebook')
-												[vk] Add support for embedded videos (Closes #2473)
											
										
										
											10 years ago
+								        # Look for embedded VK player
 								        mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'VK')
-												[generic] Add support for ivi.ru embedded player

											
										
										
											10 years ago
+								        # Look for embedded ivi player
 								        mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Ivi')
-												[huffpost] Add support

											
										
										
											10 years ago
+								        # Look for embedded Huffington Post player
 								        mobj = re.search(
-												Improve some regexes for embedded players
											
										
										
											10 years ago
+								            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
-												[huffpost] Add support

											
										
										
											10 years ago
+								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'HuffPost')
-												Add support for embed.ly

											
										
										
											10 years ago
+								        # Look for embed.ly
 								        mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
 								        mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
 								        if mobj is not None:
-												[extractor/generic] Use compat_urllib_parse_unquote

											
										
										
											9 years ago
+								            return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
-												Add support for embed.ly

											
										
										
											10 years ago
-												[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)

											
										
										
											10 years ago
+								        # Look for funnyordie embed
 								        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
 								        if matches:
-												[generic] Simplify playlist support (#2948)

											
										
										
											10 years ago
+								            return _playlist_from_matches(
 								                matches, getter=unescapeHTML, ie='FunnyOrDie')
-												[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)

											
										
										
											10 years ago
-												[generic] Add support for BBC iPlayer embeds (Closes #4619)

											
										
										
											9 years ago
+								        # Look for BBC iPlayer embed
 								        matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
 								        if matches:
-												[generic] Generalize BBC iPlayer playlist extraction

											
										
										
											9 years ago
+								            return _playlist_from_matches(matches, ie='BBCCoUk')
-												[generic] Add support for BBC iPlayer embeds (Closes #4619)

											
										
										
											9 years ago
-												[generic] Add support for embedded rutv player

											
										
										
											10 years ago
+								        # Look for embedded RUTV player
 								        rutv_url = RUTVIE._extract_url(webpage)
 								        if rutv_url:
 								            return self.url_result(rutv_url, 'RUTV')
-												[extractor/generic] Add support for tvc embeds

											
										
										
											9 years ago
+								        # Look for embedded TVC player
-												[extractor/generic] Rename tvc embed url variable

											
										
										
											9 years ago
+								        tvc_url = TVCIE._extract_url(webpage)
 								        if tvc_url:
 								            return self.url_result(tvc_url, 'TVC')
-												[extractor/generic] Add support for tvc embeds

											
										
										
											9 years ago
-												[generic] Add support for sportbox embeds

											
										
										
											9 years ago
+								        # Look for embedded SportBox player
 								        sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
 								        if sportbox_urls:
 								            return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
-												[tumblr] Add support for pornhub embeds (Closes #5963)

											
										
										
											9 years ago
+								        # Look for embedded PornHub player
-												[extractor/generic] Add support for pornhub embeds

											
										
										
											9 years ago
+								        pornhub_url = PornHubIE._extract_url(webpage)
 								        if pornhub_url:
 								            return self.url_result(pornhub_url, 'PornHub')
-												[generic] Add support for xhamster embeds

											
										
										
											9 years ago
+								        # Look for embedded XHamster player
 								        xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
 								        if xhamster_urls:
 								            return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
-												[extractor/generic] Add support for tvigle embeds

											
										
										
											9 years ago
+								        # Look for embedded Tvigle player
 								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Tvigle')
-												[generic] Run TED detection before JW Player detection

Otherwise it overwrittes the `mobj` variable.

											
										
										
											10 years ago
+								        # Look for embedded TED player
 								        mobj = re.search(
-												[generic] PEP8

											
										
										
											9 years ago
+								            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
-												[generic] Run TED detection before JW Player detection

Otherwise it overwrittes the `mobj` variable.

											
										
										
											10 years ago
+								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'TED')
-												[UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694)

											
										
										
											10 years ago
+								        # Look for embedded Ustream videos
 								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Ustream')
-												[arte] Add support for embedded videos (Fixes #2620)

											
										
										
											10 years ago
+								        # Look for embedded arte.tv player
 								        mobj = re.search(
 								            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'ArteTVEmbed')
-												[extractor/generic] Add support for francetv embeds

											
										
										
											9 years ago
+								        # Look for embedded francetv player
 								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												[smotri] Modernize and add support for emdebbed videos (Closes #2585)

											
										
										
											10 years ago
+								        # Look for embedded smotri.com player
 								        smotri_url = SmotriIE._extract_url(webpage)
 								        if smotri_url:
 								            return self.url_result(smotri_url, 'Smotri')
-												[extractor/generic:myvi] Add support for myvi embeds

											
										
										
											9 years ago
+								        # Look for embedded Myvi.ru player
-												[myvi:embed] Rename to myvi

											
										
										
											9 years ago
+								        myvi_url = MyviIE._extract_url(webpage)
-												[extractor/generic:myvi] Add support for myvi embeds

											
										
										
											9 years ago
+								        if myvi_url:
 								            return self.url_result(myvi_url)
-												[soundcloud/generic] Add support for playlists

											
										
										
											10 years ago
+								        # Look for embeded soundcloud player
 								        mobj = re.search(
-												[generic] Allow soundcloud embeds with additional attributes

											
										
										
											10 years ago
+								            r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
-												[soundcloud/generic] Add support for playlists

											
										
										
											10 years ago
+								            webpage)
 								        if mobj is not None:
 								            url = unescapeHTML(mobj.group('url'))
 								            return self.url_result(url)
-												[Vulture] Add support for vulture.com

											
										
										
											10 years ago
+								        # Look for embedded vulture.com player
 								        mobj = re.search(
 								            r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
 								            webpage)
 								        if mobj is not None:
 								            url = unescapeHTML(mobj.group('url'))
 								            return self.url_result(url, ie='Vulture')
-												[generic] Extract mtvservices embedded videos

											
										
										
											10 years ago
+								        # Look for embedded mtvservices player
 								        mobj = re.search(
 								            r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
 								            webpage)
 								        if mobj is not None:
 								            url = unescapeHTML(mobj.group('url'))
 								            return self.url_result(url, ie='MTVServicesEmbedded')
-												[yahoo] Add support for embedded videos (Closes #3525)

											
										
										
											10 years ago
+								        # Look for embedded yahoo player
 								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Yahoo')
-												[sbs] Add new extractor (Fixes #3566)

											
										
										
											10 years ago
+								        # Look for embedded sbs.com.au player
 								        mobj = re.search(
-												[generic] Improve SBS detection (Fixes #4899)

											
										
										
											9 years ago
+								            r'''(?x)
 								            (?:
 								                <meta\s+property="og:video"\s+content=|
 								                <iframe[^>]+?src=
 								            )
 								            (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
-												[sbs] Add new extractor (Fixes #3566)

											
										
										
											10 years ago
+								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'SBS')
-												[cinchcast] Add new extractor (Fixes #4428)

											
										
										
											10 years ago
+								        # Look for embedded Cinchcast player
 								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Cinchcast')
-												[mlb] Add support for embedded videos (Closes #3653)

											
										
										
											10 years ago
+								        mobj = re.search(
-												[generic] Improve MLB iframe regex

											
										
										
											10 years ago
+								            r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
-												[mlb] Add support for embedded videos (Closes #3653)

											
										
										
											10 years ago
+								            webpage)
-												[generic] Detect more MLB videos (fixes #5443)

											
										
										
											9 years ago
+								        if not mobj:
 								            mobj = re.search(
 								                r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
 								                webpage)
-												[mlb] Add support for embedded videos (Closes #3653)

											
										
										
											10 years ago
+								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'MLB')
-												[condenast] Add support for embedded videos (Closes #3929)

											
										
										
											10 years ago
+								        mobj = re.search(
 								            r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
-												[generic] Add support for livestream embeds (Fixes #4185)

											
										
										
											10 years ago
+								        mobj = re.search(
 								            r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Livestream')
-												[generic] Add support for Zapiks embeds (#5014)

											
										
										
											9 years ago
+								        # Look for Zapiks embed
 								        mobj = re.search(
 								            r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Zapiks')
-												[generic] Support dynamic Kaltura embeds (#5016) (#5073)

											
										
										
											9 years ago
+								        # Look for Kaltura embeds
-												[extractor/generic] Improve kaltura embeds support (Closes #6137)

											
										
										
											9 years ago
+								        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
 								                re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
-												[generic] Support dynamic Kaltura embeds (#5016) (#5073)

											
										
										
											9 years ago
+								        if mobj is not None:
 								            return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
-												[eagleplatform] Add support for embeds

											
										
										
											9 years ago
+								        # Look for Eagle.Platform embeds
 								        mobj = re.search(
 								            r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'EaglePlatform')
-												[eagleplatform] Add support for ClipYou embeds

											
										
										
											9 years ago
+								        # Look for ClipYou (uses Eagle.Platform) embeds
 								        mobj = re.search(
 								            r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
 								        if mobj is not None:
 								            return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
-												[pladform] Add support for embeds

											
										
										
											9 years ago
+								        # Look for Pladform embeds
 								        mobj = re.search(
 								            r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'), 'Pladform')
-												[generic] Add support for playwire embeds (Closes #5430)

											
										
										
											9 years ago
+								        # Look for Playwire embeds
 								        mobj = re.search(
 								            r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												[generic] Add support for 5min embeds (#5310)

											
										
										
											9 years ago
+								        # Look for 5min embeds
 								        mobj = re.search(
 								            r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
 								        if mobj is not None:
 								            return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
-												[generic] Add support for Crooks and Liars embeds

											
										
										
											9 years ago
+								        # Look for Crooks and Liars embeds
 								        mobj = re.search(
 								            r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
 								        if mobj is not None:
 								            return self.url_result(mobj.group('url'))
-												[NBC/ThePlatform/Generic] Add a generic detector for NBCSportsVPlayer and enhance error detection in ThePlatformIE

											
										
										
											9 years ago
+								        # Look for NBC Sports VPlayer embeds
 								        nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
 								        if nbc_sports_url:
 								            return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
-												add google drive embeds
											
										
										
											9 years ago
+								        # Look for Google Drive embeds
 								        google_drive_url = GoogleDriveEmbedIE._extract_url(webpage)
 								        if google_drive_url:
 								            return self.url_result(google_drive_url, 'GoogleDrive')
-												[udn] Add new extractor

											
										
										
											9 years ago
+								        # Look for UDN embeds
 								        mobj = re.search(
 								            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
 								        if mobj is not None:
 								            return self.url_result(
-												[utils] Remove url_infer_protocol

											
										
										
											9 years ago
+								                compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
-												[udn] Add new extractor

											
										
										
											9 years ago
-												[CSpan] Add detection for Senate ISVP. Closes #5302

											
										
										
											9 years ago
+								        # Look for Senate ISVP iframe
 								        senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
 								        if senate_isvp_url:
-												[generic] Fix typo

											
										
										
											9 years ago
+								            return self.url_result(senate_isvp_url, 'SenateISVP')
-												[CSpan] Add detection for Senate ISVP. Closes #5302

											
										
										
											9 years ago
-												[dailymotion/generic] Add DailymotionCloudIE

											
										
										
											9 years ago
+								        # Look for Dailymotion Cloud videos
 								        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
 								        if dmcloud_url:
 								            return self.url_result(dmcloud_url, 'DailymotionCloud')
-												[extractor/generic] Add support for OnionStudios embeds (Closes #5841)

											
										
										
											9 years ago
+								        # Look for OnionStudios embeds
 								        onionstudios_url = OnionStudiosIE._extract_url(webpage)
 								        if onionstudios_url:
 								            return self.url_result(onionstudios_url)
-												[extractor/generic] Add support for snagfilms embeds

											
										
										
											9 years ago
+								        # Look for SnagFilms embeds
 								        snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
 								        if snagfilms_url:
 								            return self.url_result(snagfilms_url)
-												[generic/adobetv] Support AdobeTVVideo embeds (#6039)

											
										
										
											9 years ago
+								        # Look for AdobeTVVideo embeds
 								        mobj = re.search(
 								            r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
 								            webpage)
 								        if mobj is not None:
 								            return self.url_result(
 								                self._proto_relative_url(unescapeHTML(mobj.group(1))),
 								                'AdobeTVVideo')
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											10 years ago
+								        def check_video(vurl):
-												[generic] Add support for jwPlayer YouTube videos

This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)

											
										
										
											9 years ago
+								            if YoutubeIE.suitable(vurl):
 								                return True
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											10 years ago
+								            vpath = compat_urlparse.urlparse(vurl).path
 								            vext = determine_ext(vpath)
 								            return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
 								        def filter_video(urls):
 								            return list(filter(check_video, urls))
-												Move GenericIE into its own file

											
										
										
											11 years ago
+								        # Start with something easy: JW Player in SWFObject
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											10 years ago
+								        found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								        if not found:
-												[generic] Support gorillavid.in

Previously, we were a little bit over-eager and got a random swf file.
Fixes #2084.

											
										
										
											10 years ago
+								            # Look for gorilla-vid style embedding
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											10 years ago
+								            found = filter_video(re.findall(r'''(?sx)
-												[generic] Improve jwplayer detection (Fixes #2731)

											
										
										
											10 years ago
+								                (?:
 								                    jw_plugins|
 								                    JWPlayerOptions|
 								                    jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
 								                )
-												[generic] Add support for jwPlayer YouTube videos

This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)

											
										
										
											9 years ago
+								                .*?
 								                ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								        if not found:
-												Move GenericIE into its own file

											
										
										
											11 years ago
+								            # Broaden the search a little bit
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											10 years ago
+								            found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								        if not found:
 								            # Broaden the findall a little bit: JWPlayer JS loader
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											10 years ago
+								            found = filter_video(re.findall(
 								                r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
-												[generic] Automatic detection of flow player and age_limit (Fixes #3576)

											
										
										
											10 years ago
+								        if not found:
 								            # Flow player
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											10 years ago
+								            found = filter_video(re.findall(r'''(?xs)
-												[generic] Automatic detection of flow player and age_limit (Fixes #3576)

											
										
										
											10 years ago
+								                flowplayer\("[^"]+",\s*
 								                    \{[^}]+?\}\s*,
-												The opening curly brace `{` is a regex reserved [control character](http://stackoverflow.com/a/400316/1106367), so it needs to be escaped.

											
										
										
											9 years ago
+								                    \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
-												[generic] Automatic detection of flow player and age_limit (Fixes #3576)

											
										
										
											10 years ago
+								                        ["']?url["']?\s*:\s*["']([^"']+)["']
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											10 years ago
+								            ''', webpage))
-												[generic] Add support for Cinerama player (Fixes #4752)

											
										
										
											9 years ago
+								        if not found:
 								            # Cinerama player
 								            found = re.findall(
 								                r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								        if not found:
-												Move GenericIE into its own file

											
										
										
											11 years ago
+								            # Try to find twitter cards info
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											10 years ago
+								            found = filter_video(re.findall(
 								                r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								        if not found:
-												Move GenericIE into its own file

											
										
										
											11 years ago
+								            # We look for Open Graph info:
 								            # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								            m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
-												Move GenericIE into its own file

											
										
										
											11 years ago
+								            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
 								            if m_video_type is not None:
-												[generic] Ignore some non-video file extensions during generic extraction (Closes #3900)

											
										
										
											10 years ago
+								                found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								        if not found:
-												[generic] support HTML5 video

											
										
										
											11 years ago
+								            # HTML5 video
-												[generic] Add support for single quotes in HTML5 videos (Fixes #4265)

											
										
										
											10 years ago
+								            found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								        if not found:
-												[generic] Generalize redirect regex

											
										
										
											9 years ago
+								            REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
-												[generic] Fix redirect

											
										
										
											10 years ago
+								            found = re.search(
-												[generic] Add support for <meta redirect>

Fixes #413

											
										
										
											10 years ago
+								                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
-												[generic] Generalize redirect regex

											
										
										
											9 years ago
+								                r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
-												[generic] Add support for <meta redirect>

Fixes #413

											
										
										
											10 years ago
+								                webpage)
-												[generic] Follow redirects specified by `Refresh` HTTP header

											
										
										
											9 years ago
+								            if not found:
 								                # Look also in Refresh HTTP header
 								                refresh_header = head_response.headers.get('Refresh')
 								                if refresh_header:
-												[generic] Generalize redirect regex

											
										
										
											9 years ago
+								                    found = re.search(REDIRECT_REGEX, refresh_header)
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								            if found:
-												[generic] Unescape HTML escape sequences in redirect urls (fixes #6311)

											
										
										
											9 years ago
+								                new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
-												[generic] Add support for <meta redirect>

Fixes #413

											
										
										
											10 years ago
+								                self.report_following_redirect(new_url)
 								                return {
 								                    '_type': 'url',
 								                    'url': new_url,
 								                }
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								        if not found:
-												Add documentation about supported sites (Fixes #4503)

											
										
										
											9 years ago
+								            raise UnsupportedError(url)
-												Move GenericIE into its own file

											
										
										
											11 years ago
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								        entries = []
 								        for video_url in found:
 								            video_url = compat_urlparse.urljoin(url, video_url)
-												[extractor/generic] Use compat_urllib_parse_unquote

											
										
										
											9 years ago
+								            video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
-												Move GenericIE into its own file

											
										
										
											11 years ago
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								            # Sometimes, jwplayer extraction will result in a YouTube URL
 								            if YoutubeIE.suitable(video_url):
 								                entries.append(self.url_result(video_url, 'Youtube'))
 								                continue
-												Move GenericIE into its own file

											
										
										
											11 years ago
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								            # here's a fun little line of code for you:
 								            video_id = os.path.splitext(video_id)[0]
-												[youtube] Support jwplayer with YouTube URLs (Closes #2075)

											
										
										
											10 years ago
-												[generic] Extract videos from SMIL manifests (closes #5145 and fixes #5135)

											
										
										
											9 years ago
+								            if determine_ext(video_url) == 'smil':
 								                entries.append({
 								                    'id': video_id,
 								                    'formats': self._extract_smil_formats(video_url, video_id),
 								                    'uploader': video_uploader,
 								                    'title': video_title,
 								                    'age_limit': age_limit,
 								                })
 								            else:
 								                entries.append({
 								                    'id': video_id,
 								                    'url': video_url,
 								                    'uploader': video_uploader,
 								                    'title': video_title,
 								                    'age_limit': age_limit,
 								                })
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
 								        if len(entries) == 1:
-												[generic] Fix wrong entries index

											
										
										
											10 years ago
+								            return entries[0]
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								        else:
 								            for num, e in enumerate(entries, start=1):
-												[generic] Don't set the 'title' if it's not defined in the entry (closes #5061)

Some of them may be an 'url' result, which in general don't have the 'title' field.

											
										
										
											9 years ago
+								                # 'url' results don't have a title
 								                if e.get('title') is not None:
 								                    e['title'] = '%s (%d)' % (e['title'], num)
-												[generic] Allow multiple matches for generic hits (Fixes #2818)

											
										
										
											10 years ago
+								            return {
 								                '_type': 'playlist',
 								                'entries': entries,
 								            }