From 92dc0227f9b71701d7906802655ab223c818e8c3 Mon Sep 17 00:00:00 2001 From: Hannu Hartikainen Date: Mon, 12 Oct 2020 14:25:09 +0300 Subject: [PATCH 1/3] [twentythreevideo] support subdomain urls --- youtube_dl/extractor/twentythreevideo.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index aa0c6e90f..a9c4f69e4 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -8,8 +8,8 @@ from ..utils import int_or_none class TwentyThreeVideoIE(InfoExtractor): IE_NAME = '23video' - _VALID_URL = r'https?://video\.(?Ptwentythree\.net|23video\.com|filmweb\.no)/v\.ihtml/player\.html\?(?P.*?\bphoto(?:_|%5f)id=(?P\d+).*)' - _TEST = { + _VALID_URL = r'https?://(?P[^.]+\.(twentythree\.net|23video\.com|filmweb\.no))/v\.ihtml/player\.html\?(?P.*?\bphoto(?:_|%5f)id=(?P\d+).*)' + _TESTS = [{ 'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1', 'md5': '75fcf216303eb1dae9920d651f85ced4', 'info_dict': { @@ -21,11 +21,23 @@ class TwentyThreeVideoIE(InfoExtractor): 'uploader_id': '12258964', 'uploader': 'Rasmus Bysted', } - } + }, { + 'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620', + 'md5': '772a91f83d129ee5f015b12bea61a78b', + 'info_dict': { + 'id': '36137620', + 'ext': 'mp4', + 'upload_date': '20181004', + 'uploader': 'Kristoffer Engbo', + 'title': 'Photoshop Elements 2019 - Photo Text', + 'uploader_id': '10801356', + 'timestamp': 1538664032, + } + }] def _real_extract(self, url): - domain, query, photo_id = re.match(self._VALID_URL, url).groups() - base_url = 'https://video.%s' % domain + domain, _, query, photo_id = re.match(self._VALID_URL, url).groups() + base_url = 'https://%s' % domain photo_data = self._download_json( base_url + '/api/photo/list?' + query, photo_id, query={ 'format': 'json', From 7cda72a6f65aa538aa213fcffcfd24076ad73cc4 Mon Sep 17 00:00:00 2001 From: Hannu Hartikainen Date: Mon, 19 Oct 2020 17:30:04 +0300 Subject: [PATCH 2/3] twentythreevideo: fix review comments --- youtube_dl/extractor/twentythreevideo.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index a9c4f69e4..d8b5e54dc 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -8,7 +8,7 @@ from ..utils import int_or_none class TwentyThreeVideoIE(InfoExtractor): IE_NAME = '23video' - _VALID_URL = r'https?://(?P[^.]+\.(twentythree\.net|23video\.com|filmweb\.no))/v\.ihtml/player\.html\?(?P.*?\bphoto(?:_|%5f)id=(?P\d+).*)' + _VALID_URL = r'https?://(?P[^.]+\.twentythree\.net|[^.]+\.23video\.com|[^.]+\.filmweb\.no)/v\.ihtml/player\.html\?(?P.*?\bphoto(?:_|%5f)id=(?P\d+).*)' _TESTS = [{ 'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1', 'md5': '75fcf216303eb1dae9920d651f85ced4', @@ -23,20 +23,11 @@ class TwentyThreeVideoIE(InfoExtractor): } }, { 'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620', - 'md5': '772a91f83d129ee5f015b12bea61a78b', - 'info_dict': { - 'id': '36137620', - 'ext': 'mp4', - 'upload_date': '20181004', - 'uploader': 'Kristoffer Engbo', - 'title': 'Photoshop Elements 2019 - Photo Text', - 'uploader_id': '10801356', - 'timestamp': 1538664032, - } + 'only_matching': True, }] def _real_extract(self, url): - domain, _, query, photo_id = re.match(self._VALID_URL, url).groups() + domain, query, photo_id = re.match(self._VALID_URL, url).groups() base_url = 'https://%s' % domain photo_data = self._download_json( base_url + '/api/photo/list?' + query, photo_id, query={ From 83d11dd72a672e85fa05fcbfee62cd8c5bb9210f Mon Sep 17 00:00:00 2001 From: Hannu Hartikainen Date: Mon, 19 Oct 2020 17:38:57 +0300 Subject: [PATCH 3/3] twentythreevideo: use a non-capturing group regex --- youtube_dl/extractor/twentythreevideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dl/extractor/twentythreevideo.py index d8b5e54dc..dc5609192 100644 --- a/youtube_dl/extractor/twentythreevideo.py +++ b/youtube_dl/extractor/twentythreevideo.py @@ -8,7 +8,7 @@ from ..utils import int_or_none class TwentyThreeVideoIE(InfoExtractor): IE_NAME = '23video' - _VALID_URL = r'https?://(?P[^.]+\.twentythree\.net|[^.]+\.23video\.com|[^.]+\.filmweb\.no)/v\.ihtml/player\.html\?(?P.*?\bphoto(?:_|%5f)id=(?P\d+).*)' + _VALID_URL = r'https?://(?P[^.]+\.(?:twentythree\.net|23video\.com|filmweb\.no))/v\.ihtml/player\.html\?(?P.*?\bphoto(?:_|%5f)id=(?P\d+).*)' _TESTS = [{ 'url': 'https://video.twentythree.net/v.ihtml/player.html?showDescriptions=0&source=site&photo%5fid=20448876&autoPlay=1', 'md5': '75fcf216303eb1dae9920d651f85ced4',