From 3e5b6a254234fafa2bf52900bb63e63ce74acf05 Mon Sep 17 00:00:00 2001 From: MaximZ21 Date: Thu, 12 Mar 2020 09:33:13 +0600 Subject: [PATCH 1/3] [Vlaretv] Add new extractor --- youtube_dl/extractor/extractors.py | 5 ++ youtube_dl/extractor/vlaretv.py | 87 ++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 youtube_dl/extractor/vlaretv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 64d1fa251..fb2672543 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1341,6 +1341,11 @@ from .vk import ( VKUserVideosIE, VKWallPostIE, ) +from .vlaretv import ( + VlaretvIE, + VlaretvPlaylistIE +) + from .vlive import ( VLiveIE, VLiveChannelIE, diff --git a/youtube_dl/extractor/vlaretv.py b/youtube_dl/extractor/vlaretv.py new file mode 100644 index 000000000..a118120e7 --- /dev/null +++ b/youtube_dl/extractor/vlaretv.py @@ -0,0 +1,87 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +import re + + +class VlaretvIE(InfoExtractor): + _VALID_URL = r'https?:\/\/vlare.tv\/v\/(?P[0-9a-zA-Z]+)' + IE_NAME = 'vlare.tv' + _TESTS = [ + { + 'url': 'https://vlare.tv/v/cTQKAh0z', + 'info_dict': { + 'id': 'cTQKAh0z', + 'ext': 'mp4', + 'title': 'Interspecies Reviewers Abridged | One Shot (Parody)', + } + }, + { + 'url': 'https://vlare.tv/v/HSzfUoye', + 'info_dict': { + 'id': 'HSzfUoye', + 'ext': 'mp4', + 'title': 'Quake II (1997) - Gameplay AMD K6-III+ and 3dfx Voodoo Banshee', + } + }, + { + 'url': 'https://vlare.tv/v/t7XSuZfK/2568', + 'info_dict': { + 'id': 'HSzfUoye', + 'ext': 'mp4', + 'title': 'Quake II (1997) - Gameplay AMD K6-III+ and 3dfx Voodoo Banshee', + } + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'(.+?)<\/title>', webpage, 'title').replace(' | Vlare', '') + video_urls = self._html_search_regex(r'sources: \[{"file":(.+?)\],', webpage, 'video_urls') + video_urls = video_urls.split(',') + video_urls_clean = [] + for i in video_urls: + if 'http' in i: + video_urls_clean.insert(0, {'url': i.replace("\"", "").replace("\n", "").replace("{file:", "")}) + return { + 'id': video_id, + 'title': title, + 'formats': video_urls_clean + } + + +class VlaretvPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://vlare.tv/u/(?P<Channel_id>[0-9a-zA-Z]+)/playlist/(?P<id>[0-9]+)' + IE_NAME = 'Vlare.tv Playlist' + _TEST = { + 'url': 'https://vlare.tv/u/LVWDDFhi/playlist/2568', + 'info_dict': { + 'id': '2568', + 'title': 'LHA', + }, + 'playlist_count': 11, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + urls = re.findall(r'<a href="(.+?)" class="video_thumbnail"', webpage) + title = self._html_search_regex(r'<title>(.+?)<\/title>', webpage, 'title').split('|')[1][1:-1] + entries = [] + for i in urls: + entry = { + '_type': 'url_transparent', + 'url': 'https://vlare.tv' + i, + 'id': re.match(r'\/v\/(.+?)\/', i), + } + entries.append(entry) + + return { + '_type': 'playlist', + 'title': title, + 'id': self._match_id(url), + 'entries': entries, + } From 27ef02fa83ce03505016e0a261d2e1d1288f9f53 Mon Sep 17 00:00:00 2001 From: MaximZ21 <razel.agressor.21@gmail.com> Date: Wed, 8 Apr 2020 15:43:47 +0600 Subject: [PATCH 2/3] Made requested changes --- youtube_dl/extractor/extractors.py | 6 +-- youtube_dl/extractor/vlaretv.py | 73 +++--------------------------- 2 files changed, 8 insertions(+), 71 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fb2672543..61f3b1ebb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1341,11 +1341,7 @@ from .vk import ( VKUserVideosIE, VKWallPostIE, ) -from .vlaretv import ( - VlaretvIE, - VlaretvPlaylistIE -) - +from .vlaretv import VlaretvPlaylistIE from .vlive import ( VLiveIE, VLiveChannelIE, diff --git a/youtube_dl/extractor/vlaretv.py b/youtube_dl/extractor/vlaretv.py index a118120e7..900260ece 100644 --- a/youtube_dl/extractor/vlaretv.py +++ b/youtube_dl/extractor/vlaretv.py @@ -1,60 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals - from .common import InfoExtractor +from ..utils import urljoin import re -class VlaretvIE(InfoExtractor): - _VALID_URL = r'https?:\/\/vlare.tv\/v\/(?P<id>[0-9a-zA-Z]+)' - IE_NAME = 'vlare.tv' - _TESTS = [ - { - 'url': 'https://vlare.tv/v/cTQKAh0z', - 'info_dict': { - 'id': 'cTQKAh0z', - 'ext': 'mp4', - 'title': 'Interspecies Reviewers Abridged | One Shot (Parody)', - } - }, - { - 'url': 'https://vlare.tv/v/HSzfUoye', - 'info_dict': { - 'id': 'HSzfUoye', - 'ext': 'mp4', - 'title': 'Quake II (1997) - Gameplay AMD K6-III+ and 3dfx Voodoo Banshee', - } - }, - { - 'url': 'https://vlare.tv/v/t7XSuZfK/2568', - 'info_dict': { - 'id': 'HSzfUoye', - 'ext': 'mp4', - 'title': 'Quake II (1997) - Gameplay AMD K6-III+ and 3dfx Voodoo Banshee', - } - } - ] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<title>(.+?)<\/title>', webpage, 'title').replace(' | Vlare', '') - video_urls = self._html_search_regex(r'sources: \[{"file":(.+?)\],', webpage, 'video_urls') - video_urls = video_urls.split(',') - video_urls_clean = [] - for i in video_urls: - if 'http' in i: - video_urls_clean.insert(0, {'url': i.replace("\"", "").replace("\n", "").replace("{file:", "")}) - return { - 'id': video_id, - 'title': title, - 'formats': video_urls_clean - } - - class VlaretvPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://vlare.tv/u/(?P<Channel_id>[0-9a-zA-Z]+)/playlist/(?P<id>[0-9]+)' - IE_NAME = 'Vlare.tv Playlist' + _VALID_URL = r'https?://vlare\.tv/u/(?P<Channel_id>[0-9a-zA-Z]+)/playlist/(?P<id>[0-9]+)' _TEST = { 'url': 'https://vlare.tv/u/LVWDDFhi/playlist/2568', 'info_dict': { @@ -66,22 +18,11 @@ class VlaretvPlaylistIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) urls = re.findall(r'<a href="(.+?)" class="video_thumbnail"', webpage) - title = self._html_search_regex(r'<title>(.+?)<\/title>', webpage, 'title').split('|')[1][1:-1] - entries = [] - for i in urls: - entry = { - '_type': 'url_transparent', - 'url': 'https://vlare.tv' + i, - 'id': re.match(r'\/v\/(.+?)\/', i), - } - entries.append(entry) + title = self._html_search_regex(r'<title>(.+?) \| Vlare', webpage, 'title') + + # When playlist points to deleted video there is an "error" in the url (Ex. https://vlare.tv/v/error/3257) + entries = [self.url_result(urljoin('https://vlare.tv', u)) for u in urls if 'error' not in u] - return { - '_type': 'playlist', - 'title': title, - 'id': self._match_id(url), - 'entries': entries, - } + return self.playlist_result(entries, playlist_id, title) From 1a44fd01069d9a8b540de0284656f02c770c4522 Mon Sep 17 00:00:00 2001 From: MaximZ21 Date: Wed, 8 Apr 2020 17:36:01 +0600 Subject: [PATCH 3/3] Minor change to satisfy flake8 --- youtube_dl/extractor/vlaretv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vlaretv.py b/youtube_dl/extractor/vlaretv.py index 900260ece..264e19518 100644 --- a/youtube_dl/extractor/vlaretv.py +++ b/youtube_dl/extractor/vlaretv.py @@ -21,7 +21,7 @@ class VlaretvPlaylistIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) urls = re.findall(r'(.+?) \| Vlare', webpage, 'title') - + # When playlist points to deleted video there is an "error" in the url (Ex. https://vlare.tv/v/error/3257) entries = [self.url_result(urljoin('https://vlare.tv', u)) for u in urls if 'error' not in u]