From 3a1ead0f6a64269862613b1157b6640617c8e9dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre-Xavier=20Labont=C3=A9-Lamoureux?= Date: Sat, 3 Oct 2020 18:12:20 -0400 Subject: [PATCH] [Playvids] Add new extractor --- docs/supportedsites.md | 1 + youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/playvids.py | 50 ++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/playvids.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 367545a96..891b43815 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -666,6 +666,7 @@ - **PlaysTV** - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **Playvid** + - **Playvids** - **Playwire** - **pluralsight** - **pluralsight:course** diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ae7079a6a..b315c74ca 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -852,6 +852,7 @@ from .playplustv import PlayPlusTVIE from .plays import PlaysTVIE from .playtvak import PlaytvakIE from .playvid import PlayvidIE +from .playvids import PlayvidsIE from .playwire import PlaywireIE from .pluralsight import ( PluralsightIE, diff --git a/youtube_dl/extractor/playvids.py b/youtube_dl/extractor/playvids.py new file mode 100644 index 000000000..d1030a51e --- /dev/null +++ b/youtube_dl/extractor/playvids.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class PlayvidsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?playvids\.com/(?P.+?)/(?P.+?)(?:$|[#\?])' + _TEST = { + 'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line', + 'md5': '3b57615c81d5580919d3a0b216056a15', + 'info_dict': { + 'id': 'bKmGLe3IwjZ', + 'ext': 'mp4', + 'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'

(.+?)

', webpage, 'title').strip() + + # search for the video urls + video_tags = re.findall(r'data-hls-src[0-9]*?="https:\/\/.*?userscontent.net.*?\.mp4\/index.m3u8\?seclink=.*?sectime=[0-9]*"', webpage) + + # get the url from each match + video_urls = [] + for n in video_tags: + video_urls.append(self._html_search_regex(r'"(.*?)"', n, 'url').replace("&", "&")) + + # reverse list so the best format is first + video_urls.reverse() + + # check if nothing was found before attempting anything + if len(video_urls) == 0: + raise ExtractorError('No video URLs found') + else: + return { + 'id': video_id, + 'title': title, + 'url': video_urls[0], + 'ext': 'mp4', + 'age_limit': 18, + }