From b2df82d8f78b8a973d0b8d6cbc4407c8255fe570 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Tue, 7 Jan 2020 18:34:51 +1100 Subject: [PATCH 01/28] [PeerTube] Replace broken test video --- youtube_dl/extractor/peertube.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index d3a83ea2b..859ed5368 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -423,26 +423,26 @@ class PeerTubeIE(InfoExtractor): (?P%s) ''' % (_INSTANCES_RE, _UUID_RE) _TESTS = [{ - 'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c', - 'md5': '80f24ff364cc9d333529506a263e7feb', + 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d', + 'md5': '9618b916a40563adfd9b04f3ec12e79b', 'info_dict': { - 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c', + 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d', 'ext': 'mp4', - 'title': 'wow', - 'description': 'wow such video, so gif', + 'title': 'What is PeerTube?', + 'description': '**[Want to help to translate this video?](https://weblate.framasoft.org/projects/what-is-peertube-video/)**\r\n\r\n**Take back the control of your videos! [#JoinPeertube](https://joinpeertube.org)**\r\n*A decentralized video hosting network, based on free/libre software!*\r\n\r\n**Animation Produced by:** [LILA](https://libreart.info) - [ZeMarmot Team](https://film.zemarmot.net)\r\n*Directed by* Aryeom\r\n*Assistant* Jehan\r\n**Licence**: [CC-By-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)\r\n\r\n**Sponsored by** [Framasoft](https://framasoft.org)\r\n\r\n**Music**: [Red Step Forward](http://play.dogmazic.net/song.php?song_id=52491) - CC-By Ken Bushima\r\n\r\n**Movie Clip**: [Caminades 3: Llamigos](http://www.caminandes.com/) CC-By Blender Institute\r\n\r\n**Video sources**: https://gitlab.gnome.org/Jehan/what-is-peertube/', 'thumbnail': r're:https?://.*\.(?:jpg|png)', - 'timestamp': 1519297480, - 'upload_date': '20180222', - 'uploader': 'Luclu7', - 'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1', - 'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7', - 'license': 'Unknown', - 'duration': 3, + 'timestamp': 1538391166, + 'upload_date': '20181001', + 'uploader': 'Framasoft', + 'uploader_id': 'framasoft@framatube.org', + 'uploder_url': 'https://framatube.org/accounts/framasoft', + 'license': 'Attribution - Share Alike', + 'duration': 113, 'view_count': int, 'like_count': int, 'dislike_count': int, - 'tags': list, - 'categories': list, + 'tags': ["framasoft","peertube"], + 'categories': ["Science & Technology"], } }, { 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', From 69c96a3e045cd6b40aceff38fa78ebc0f3765c9f Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Tue, 7 Jan 2020 18:35:53 +1100 Subject: [PATCH 02/28] [PeerTube] Fix uploader_id --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 859ed5368..e66faafa8 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -532,7 +532,7 @@ class PeerTubeIE(InfoExtractor): 'thumbnail': urljoin(url, video.get('thumbnailPath')), 'timestamp': unified_timestamp(video.get('publishedAt')), 'uploader': account_data('displayName'), - 'uploader_id': account_data('uuid'), + 'uploader_id': '%s@%s' % (account_data('name'), account_data('host')), 'uploder_url': account_data('url'), 'license': try_get( video, lambda x: x['licence']['label'], compat_str), From f271b7997f60ff203ded05fea9f87754d23cafc8 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Tue, 7 Jan 2020 18:37:00 +1100 Subject: [PATCH 03/28] [PeerTube] Fix typo: uploader_url --- youtube_dl/extractor/peertube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index e66faafa8..de7f04ee1 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -435,7 +435,7 @@ class PeerTubeIE(InfoExtractor): 'upload_date': '20181001', 'uploader': 'Framasoft', 'uploader_id': 'framasoft@framatube.org', - 'uploder_url': 'https://framatube.org/accounts/framasoft', + 'uploader_url': 'https://framatube.org/accounts/framasoft', 'license': 'Attribution - Share Alike', 'duration': 113, 'view_count': int, @@ -533,7 +533,7 @@ class PeerTubeIE(InfoExtractor): 'timestamp': unified_timestamp(video.get('publishedAt')), 'uploader': account_data('displayName'), 'uploader_id': '%s@%s' % (account_data('name'), account_data('host')), - 'uploder_url': account_data('url'), + 'uploader_url': account_data('url'), 'license': try_get( video, lambda x: x['licence']['label'], compat_str), 'duration': int_or_none(video.get('duration')), From a4d319699655a5110c1eaeef7a635cc213b201a9 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Tue, 7 Jan 2020 18:37:57 +1100 Subject: [PATCH 04/28] [PeerTube] Add language field --- youtube_dl/extractor/peertube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index de7f04ee1..769e4b9af 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -436,6 +436,7 @@ class PeerTubeIE(InfoExtractor): 'uploader': 'Framasoft', 'uploader_id': 'framasoft@framatube.org', 'uploader_url': 'https://framatube.org/accounts/framasoft', + 'language': 'en', 'license': 'Attribution - Share Alike', 'duration': 113, 'view_count': int, @@ -534,6 +535,8 @@ class PeerTubeIE(InfoExtractor): 'uploader': account_data('displayName'), 'uploader_id': '%s@%s' % (account_data('name'), account_data('host')), 'uploader_url': account_data('url'), + 'language': try_get( + video, lambda x: x['language']['id'], compat_str), 'license': try_get( video, lambda x: x['licence']['label'], compat_str), 'duration': int_or_none(video.get('duration')), From 0701dd15e0e1e9c05d1b38a0030daf0db4fa6ebf Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Tue, 7 Jan 2020 18:59:30 +1100 Subject: [PATCH 05/28] [PeerTube] Get full description instead of truncated description --- youtube_dl/extractor/peertube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 769e4b9af..c16db51fd 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -493,6 +493,9 @@ class PeerTubeIE(InfoExtractor): video = self._download_json( 'https://%s/api/v1/videos/%s' % (host, video_id), video_id) + video_description = self._download_json( + 'https://%s/api/v1/videos/%s/description' % (host, video_id), video_id) + title = video['name'] formats = [] @@ -529,7 +532,7 @@ class PeerTubeIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'description': video.get('description'), + 'description': video_description.get('description'), 'thumbnail': urljoin(url, video.get('thumbnailPath')), 'timestamp': unified_timestamp(video.get('publishedAt')), 'uploader': account_data('displayName'), From 5a449a1b1d88166d78ee26458ba5e2c6f63fbe82 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Tue, 7 Jan 2020 19:01:05 +1100 Subject: [PATCH 06/28] [PeerTube] Add subtitles --- youtube_dl/extractor/peertube.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index c16db51fd..179fdef00 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -496,6 +496,9 @@ class PeerTubeIE(InfoExtractor): video_description = self._download_json( 'https://%s/api/v1/videos/%s/description' % (host, video_id), video_id) + video_captions = self._download_json( + 'https://%s/api/v1/videos/%s/captions' % (host, video_id), video_id) + title = video['name'] formats = [] @@ -517,6 +520,13 @@ class PeerTubeIE(InfoExtractor): formats.append(f) self._sort_formats(formats) + subtitles = {} + for entry in video_captions['data']: + caption_url = 'https://%s%s' % (host, entry['captionPath']) + subtitles[entry['language']['id']] = [{ + 'url': caption_url + }] + def account_data(field): return try_get(video, lambda x: x['account'][field], compat_str) @@ -550,4 +560,5 @@ class PeerTubeIE(InfoExtractor): 'tags': try_get(video, lambda x: x['tags'], list), 'categories': categories, 'formats': formats, + 'subtitles': subtitles } From d0a86c02cc3a0e677b5bb6c04490ca78858b644e Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Tue, 7 Jan 2020 19:09:58 +1100 Subject: [PATCH 07/28] [PeerTube] Add whitespace for flake8 --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 179fdef00..252295f97 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -442,7 +442,7 @@ class PeerTubeIE(InfoExtractor): 'view_count': int, 'like_count': int, 'dislike_count': int, - 'tags': ["framasoft","peertube"], + 'tags': ["framasoft", "peertube"], 'categories': ["Science & Technology"], } }, { From 3e6e70932a1df5b6175030ef0ded39cc66ed1421 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Wed, 8 Jan 2020 11:31:46 +1100 Subject: [PATCH 08/28] [PeerTube] Make video_description download non-fatal --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 252295f97..728b0acc4 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -494,7 +494,7 @@ class PeerTubeIE(InfoExtractor): 'https://%s/api/v1/videos/%s' % (host, video_id), video_id) video_description = self._download_json( - 'https://%s/api/v1/videos/%s/description' % (host, video_id), video_id) + 'https://%s/api/v1/videos/%s/description' % (host, video_id), video_id, fatal=False) video_captions = self._download_json( 'https://%s/api/v1/videos/%s/captions' % (host, video_id), video_id) From 2c2c219911657a4d9133179fb5ab0eb2d7ac8ccf Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Wed, 8 Jan 2020 11:34:41 +1100 Subject: [PATCH 09/28] [PeerTube] Only download subtitles if requested --- youtube_dl/extractor/peertube.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 728b0acc4..d1183cee6 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -485,6 +485,18 @@ class PeerTubeIE(InfoExtractor): entries = [peertube_url] return entries + def _get_subtitles(self, host, video_id): + video_captions = self._download_json( + 'https://%s/api/v1/videos/%s/captions' % (host, video_id), video_id) + + subtitles = {} + for entry in video_captions['data']: + caption_url = 'https://%s%s' % (host, entry['captionPath']) + subtitles[entry['language']['id']] = [{ + 'url': caption_url + }] + return subtitles + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') or mobj.group('host_2') @@ -496,9 +508,6 @@ class PeerTubeIE(InfoExtractor): video_description = self._download_json( 'https://%s/api/v1/videos/%s/description' % (host, video_id), video_id, fatal=False) - video_captions = self._download_json( - 'https://%s/api/v1/videos/%s/captions' % (host, video_id), video_id) - title = video['name'] formats = [] @@ -520,12 +529,7 @@ class PeerTubeIE(InfoExtractor): formats.append(f) self._sort_formats(formats) - subtitles = {} - for entry in video_captions['data']: - caption_url = 'https://%s%s' % (host, entry['captionPath']) - subtitles[entry['language']['id']] = [{ - 'url': caption_url - }] + subtitles = self.extract_subtitles(host, video_id) def account_data(field): return try_get(video, lambda x: x['account'][field], compat_str) From 64186d344a12f7f11d71ccab695d444d53b8e347 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Wed, 8 Jan 2020 11:38:15 +1100 Subject: [PATCH 10/28] [PeerTube] Download description after title is set --- youtube_dl/extractor/peertube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index d1183cee6..19328729d 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -505,11 +505,11 @@ class PeerTubeIE(InfoExtractor): video = self._download_json( 'https://%s/api/v1/videos/%s' % (host, video_id), video_id) + title = video['name'] + video_description = self._download_json( 'https://%s/api/v1/videos/%s/description' % (host, video_id), video_id, fatal=False) - title = video['name'] - formats = [] for file_ in video['files']: if not isinstance(file_, dict): From 66f07dcd4178ca3807cf6a093d26e0a24d1f8216 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Wed, 8 Jan 2020 12:45:21 +1100 Subject: [PATCH 11/28] [PeerTube] Improve safety of dictionary access This could probably be more elegant. --- youtube_dl/extractor/peertube.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 19328729d..e4fc78695 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -490,11 +490,16 @@ class PeerTubeIE(InfoExtractor): 'https://%s/api/v1/videos/%s/captions' % (host, video_id), video_id) subtitles = {} - for entry in video_captions['data']: - caption_url = 'https://%s%s' % (host, entry['captionPath']) - subtitles[entry['language']['id']] = [{ - 'url': caption_url - }] + for entry in video_captions.get('data'): + captions_language = entry.get('language') + if captions_language is not None: + language_id = captions_language.get('id') + caption_path = entry.get('captionPath') + if language_id is not None and caption_path is not None: + caption_url = 'https://%s%s' % (host, caption_path) + subtitles[language_id] = [{ + 'url': caption_url + }] return subtitles def _real_extract(self, url): From b31f3dc2dcc412dab8b5c21b64b6dbef0e51d38b Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Thu, 9 Jan 2020 12:41:44 +1100 Subject: [PATCH 12/28] Allow for multiple captions for each language This may eventually be needed in future releases of PeerTube --- youtube_dl/extractor/peertube.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index e4fc78695..e24b5c7fd 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -497,9 +497,14 @@ class PeerTubeIE(InfoExtractor): caption_path = entry.get('captionPath') if language_id is not None and caption_path is not None: caption_url = 'https://%s%s' % (host, caption_path) - subtitles[language_id] = [{ - 'url': caption_url - }] + if language_id in subtitles: + subtitles[language_id].append({ + 'url': caption_url + }) + else: + subtitles[language_id] = [{ + 'url': caption_url + }] return subtitles def _real_extract(self, url): From 05a1a553b952b5bc762760f7def99f4073b79eee Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Thu, 9 Jan 2020 12:53:44 +1100 Subject: [PATCH 13/28] [PeerTube] Make caption metadata download non-fatal --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index e24b5c7fd..44dc2e489 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -487,7 +487,7 @@ class PeerTubeIE(InfoExtractor): def _get_subtitles(self, host, video_id): video_captions = self._download_json( - 'https://%s/api/v1/videos/%s/captions' % (host, video_id), video_id) + 'https://%s/api/v1/videos/%s/captions' % (host, video_id), video_id, fatal=False) subtitles = {} for entry in video_captions.get('data'): From d96b295a32c384ebc73e21b499dd8426c1b085d0 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Thu, 9 Jan 2020 13:12:12 +1100 Subject: [PATCH 14/28] [PeerTube] Add various safety precautions --- youtube_dl/extractor/peertube.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 44dc2e489..b7bbc09bd 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( int_or_none, + str_or_none, parse_resolution, try_get, unified_timestamp, @@ -491,20 +492,18 @@ class PeerTubeIE(InfoExtractor): subtitles = {} for entry in video_captions.get('data'): - captions_language = entry.get('language') - if captions_language is not None: - language_id = captions_language.get('id') - caption_path = entry.get('captionPath') - if language_id is not None and caption_path is not None: - caption_url = 'https://%s%s' % (host, caption_path) - if language_id in subtitles: - subtitles[language_id].append({ - 'url': caption_url - }) - else: - subtitles[language_id] = [{ - 'url': caption_url - }] + language_id = try_get(entry, lambda x: x['language']['id'], compat_str) + caption_path = str_or_none(entry.get('captionPath')) + if language_id and caption_path: + caption_url = 'https://%s%s' % (host, caption_path) + if language_id in subtitles: + subtitles[language_id].append({ + 'url': caption_url + }) + else: + subtitles[language_id] = [{ + 'url': caption_url + }] return subtitles def _real_extract(self, url): @@ -515,13 +514,13 @@ class PeerTubeIE(InfoExtractor): video = self._download_json( 'https://%s/api/v1/videos/%s' % (host, video_id), video_id) - title = video['name'] + title = video.get('name') video_description = self._download_json( 'https://%s/api/v1/videos/%s/description' % (host, video_id), video_id, fatal=False) formats = [] - for file_ in video['files']: + for file_ in video.get('files'): if not isinstance(file_, dict): continue file_url = url_or_none(file_.get('fileUrl')) From 1e8781843f103d1a082b681a48795131035f50fd Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Thu, 9 Jan 2020 13:19:43 +1100 Subject: [PATCH 15/28] [PeerTube] Add channel metadata fields --- youtube_dl/extractor/peertube.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index b7bbc09bd..fee5e13de 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -543,6 +543,9 @@ class PeerTubeIE(InfoExtractor): def account_data(field): return try_get(video, lambda x: x['account'][field], compat_str) + def channel_data(field): + return try_get(video, lambda x: x['channel'][field], compat_str) + category = try_get(video, lambda x: x['category']['label'], compat_str) categories = [category] if category else None @@ -561,6 +564,9 @@ class PeerTubeIE(InfoExtractor): 'uploader': account_data('displayName'), 'uploader_id': '%s@%s' % (account_data('name'), account_data('host')), 'uploader_url': account_data('url'), + 'channel': channel_data('displayName'), + 'channel_id': '%s@%s' % (channel_data('name'), channel_data('host')), + 'channel_url': channel_data('url'), 'language': try_get( video, lambda x: x['language']['id'], compat_str), 'license': try_get( From 39af4a84b0d92579f6f530f4f217b18b9af6e360 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Thu, 9 Jan 2020 13:40:54 +1100 Subject: [PATCH 16/28] [PeerTube] Improve safety of description extraction --- youtube_dl/extractor/peertube.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index fee5e13de..9bb9e88e2 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -516,9 +516,6 @@ class PeerTubeIE(InfoExtractor): title = video.get('name') - video_description = self._download_json( - 'https://%s/api/v1/videos/%s/description' % (host, video_id), video_id, fatal=False) - formats = [] for file_ in video.get('files'): if not isinstance(file_, dict): @@ -538,6 +535,13 @@ class PeerTubeIE(InfoExtractor): formats.append(f) self._sort_formats(formats) + video_description = self._download_json( + 'https://%s/api/v1/videos/%s/description' % (host, video_id), video_id, fatal=False) + + description = "" + if video_description: + description = video_description.get('description') + subtitles = self.extract_subtitles(host, video_id) def account_data(field): @@ -558,7 +562,7 @@ class PeerTubeIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'description': video_description.get('description'), + 'description': description, 'thumbnail': urljoin(url, video.get('thumbnailPath')), 'timestamp': unified_timestamp(video.get('publishedAt')), 'uploader': account_data('displayName'), From 1ff43a42a3c9f95d88f0fc9beaef54af3ee69f57 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Sat, 11 Jan 2020 12:55:13 +1100 Subject: [PATCH 17/28] [PeerTube] Revert use of get() for title, files --- youtube_dl/extractor/peertube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 9bb9e88e2..9f5861dff 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -514,10 +514,10 @@ class PeerTubeIE(InfoExtractor): video = self._download_json( 'https://%s/api/v1/videos/%s' % (host, video_id), video_id) - title = video.get('name') + title = video['name'] formats = [] - for file_ in video.get('files'): + for file_ in video['files']: if not isinstance(file_, dict): continue file_url = url_or_none(file_.get('fileUrl')) From d3d850f0e129d7c36702d19d4f9076f2618b536b Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Sat, 11 Jan 2020 12:56:21 +1100 Subject: [PATCH 18/28] [PeerTube] Fix out-of-order imports --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 9f5861dff..87c7fdf15 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -7,8 +7,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( int_or_none, - str_or_none, parse_resolution, + str_or_none, try_get, unified_timestamp, url_or_none, From 7ea039df340f68086e221e68c4acd35f6f50f587 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Sat, 11 Jan 2020 13:13:05 +1100 Subject: [PATCH 19/28] [PeerTube] Handle case where captions page does not exist --- youtube_dl/extractor/peertube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 87c7fdf15..1b58a859b 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -489,6 +489,8 @@ class PeerTubeIE(InfoExtractor): def _get_subtitles(self, host, video_id): video_captions = self._download_json( 'https://%s/api/v1/videos/%s/captions' % (host, video_id), video_id, fatal=False) + if not video_captions: + return None subtitles = {} for entry in video_captions.get('data'): From 3d3b91554afd335ac89a7062a51932a454df262d Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Sat, 11 Jan 2020 13:15:17 +1100 Subject: [PATCH 20/28] [PeerTube] Use single-quotes instead of double quotes in tests --- youtube_dl/extractor/peertube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 1b58a859b..cc4c8bd5c 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -443,8 +443,8 @@ class PeerTubeIE(InfoExtractor): 'view_count': int, 'like_count': int, 'dislike_count': int, - 'tags': ["framasoft", "peertube"], - 'categories': ["Science & Technology"], + 'tags': ['framasoft', 'peertube'], + 'categories': ['Science & Technology'], } }, { 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', From 37a15bc93ba94a0ba455c5910d0c3c4a0bdd9f75 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Sat, 11 Jan 2020 14:31:05 +1100 Subject: [PATCH 21/28] [PeerTube] Refactor subtitles extractor --- youtube_dl/extractor/peertube.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index cc4c8bd5c..d7f48b646 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -498,14 +498,11 @@ class PeerTubeIE(InfoExtractor): caption_path = str_or_none(entry.get('captionPath')) if language_id and caption_path: caption_url = 'https://%s%s' % (host, caption_path) - if language_id in subtitles: - subtitles[language_id].append({ + caption_dict = { 'url': caption_url - }) - else: - subtitles[language_id] = [{ - 'url': caption_url - }] + } + if subtitles.setdefault(language_id, [caption_dict]) != [caption_dict]: + subtitles[language_id].append(caption_dict) return subtitles def _real_extract(self, url): From 7057804c9450ee25f48e2b9fc8d720f0959e0679 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Sat, 11 Jan 2020 15:39:13 +1100 Subject: [PATCH 22/28] [PeerTube] Check captions and desc are dict before accessing --- youtube_dl/extractor/peertube.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index d7f48b646..b163ee7a6 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -489,7 +489,7 @@ class PeerTubeIE(InfoExtractor): def _get_subtitles(self, host, video_id): video_captions = self._download_json( 'https://%s/api/v1/videos/%s/captions' % (host, video_id), video_id, fatal=False) - if not video_captions: + if not isinstance(video_captions, dict): return None subtitles = {} @@ -537,10 +537,10 @@ class PeerTubeIE(InfoExtractor): video_description = self._download_json( 'https://%s/api/v1/videos/%s/description' % (host, video_id), video_id, fatal=False) - description = "" - if video_description: + description = None + if isinstance(video_description, dict): description = video_description.get('description') - + subtitles = self.extract_subtitles(host, video_id) def account_data(field): From 74026525e35f82138391d15123a3e8652a552dc5 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Sat, 11 Jan 2020 16:01:12 +1100 Subject: [PATCH 23/28] [PeerTube] Refactor code to avoid repetition --- youtube_dl/extractor/peertube.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index b163ee7a6..d638d05c4 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -499,8 +499,8 @@ class PeerTubeIE(InfoExtractor): if language_id and caption_path: caption_url = 'https://%s%s' % (host, caption_path) caption_dict = { - 'url': caption_url - } + 'url': caption_url + } if subtitles.setdefault(language_id, [caption_dict]) != [caption_dict]: subtitles[language_id].append(caption_dict) return subtitles @@ -540,16 +540,26 @@ class PeerTubeIE(InfoExtractor): description = None if isinstance(video_description, dict): description = video_description.get('description') - + subtitles = self.extract_subtitles(host, video_id) + def try_get_second_level_data(section, field): + return try_get(video, lambda x: x[section][field], compat_str) + def account_data(field): - return try_get(video, lambda x: x['account'][field], compat_str) + return try_get_second_level_data('account', field) def channel_data(field): - return try_get(video, lambda x: x['channel'][field], compat_str) + return try_get_second_level_data('channel', field) + + def make_id_string(name_field, host_field): + name = str_or_none(name_field) + host = str_or_none(host_field) + if name and host: + return '%s@%s' % (name, host) + return None - category = try_get(video, lambda x: x['category']['label'], compat_str) + category = try_get_second_level_data('category', 'label') categories = [category] if category else None nsfw = video.get('nsfw') @@ -565,15 +575,13 @@ class PeerTubeIE(InfoExtractor): 'thumbnail': urljoin(url, video.get('thumbnailPath')), 'timestamp': unified_timestamp(video.get('publishedAt')), 'uploader': account_data('displayName'), - 'uploader_id': '%s@%s' % (account_data('name'), account_data('host')), + 'uploader_id': make_id_string(account_data('name'), account_data('host')), 'uploader_url': account_data('url'), 'channel': channel_data('displayName'), - 'channel_id': '%s@%s' % (channel_data('name'), channel_data('host')), + 'channel_id': make_id_string(channel_data('name'), channel_data('host')), 'channel_url': channel_data('url'), - 'language': try_get( - video, lambda x: x['language']['id'], compat_str), - 'license': try_get( - video, lambda x: x['licence']['label'], compat_str), + 'language': try_get_second_level_data('language', 'id'), + 'license': try_get_second_level_data('licence', 'label'), 'duration': int_or_none(video.get('duration')), 'view_count': int_or_none(video.get('views')), 'like_count': int_or_none(video.get('likes')), From c273a16b1078cfcbdbf3012a823b89b871a194ff Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Sat, 11 Jan 2020 16:08:06 +1100 Subject: [PATCH 24/28] [PeerTube] Fix incorrectly calculated MD5 checksum in test --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index d638d05c4..58287cf33 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -425,7 +425,7 @@ class PeerTubeIE(InfoExtractor): ''' % (_INSTANCES_RE, _UUID_RE) _TESTS = [{ 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d', - 'md5': '9618b916a40563adfd9b04f3ec12e79b', + 'md5': '9bed8c0137913e17b86334e5885aacff', 'info_dict': { 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d', 'ext': 'mp4', From f22450754861557892368dc03750a79c3dceffa9 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Mon, 20 Jan 2020 22:32:03 +1100 Subject: [PATCH 25/28] [PeerTube] Fix ids, add channel tests --- youtube_dl/extractor/peertube.py | 42 +++++++++++++++----------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 58287cf33..435a2bd71 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -435,8 +435,11 @@ class PeerTubeIE(InfoExtractor): 'timestamp': 1538391166, 'upload_date': '20181001', 'uploader': 'Framasoft', - 'uploader_id': 'framasoft@framatube.org', + 'uploader_id': 3, 'uploader_url': 'https://framatube.org/accounts/framasoft', + 'channel': 'Les vidéos de Framasoft', + 'channel_id': 2, + 'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8', 'language': 'en', 'license': 'Attribution - Share Alike', 'duration': 113, @@ -543,23 +546,16 @@ class PeerTubeIE(InfoExtractor): subtitles = self.extract_subtitles(host, video_id) - def try_get_second_level_data(section, field): - return try_get(video, lambda x: x[section][field], compat_str) + def try_get_second_level_data(section, field, type_): + return try_get(video, lambda x: x[section][field], type_) - def account_data(field): - return try_get_second_level_data('account', field) + def account_data(field, type_): + return try_get_second_level_data('account', field, type_) - def channel_data(field): - return try_get_second_level_data('channel', field) + def channel_data(field, type_): + return try_get_second_level_data('channel', field, type_) - def make_id_string(name_field, host_field): - name = str_or_none(name_field) - host = str_or_none(host_field) - if name and host: - return '%s@%s' % (name, host) - return None - - category = try_get_second_level_data('category', 'label') + category = try_get_second_level_data('category', 'label', compat_str) categories = [category] if category else None nsfw = video.get('nsfw') @@ -574,14 +570,14 @@ class PeerTubeIE(InfoExtractor): 'description': description, 'thumbnail': urljoin(url, video.get('thumbnailPath')), 'timestamp': unified_timestamp(video.get('publishedAt')), - 'uploader': account_data('displayName'), - 'uploader_id': make_id_string(account_data('name'), account_data('host')), - 'uploader_url': account_data('url'), - 'channel': channel_data('displayName'), - 'channel_id': make_id_string(channel_data('name'), channel_data('host')), - 'channel_url': channel_data('url'), - 'language': try_get_second_level_data('language', 'id'), - 'license': try_get_second_level_data('licence', 'label'), + 'uploader': account_data('displayName', compat_str), + 'uploader_id': account_data('id', int), + 'uploader_url': url_or_none(account_data('url', compat_str)), + 'channel': channel_data('displayName', compat_str), + 'channel_id': channel_data('id', int), + 'channel_url': url_or_none(channel_data('url', compat_str)), + 'language': try_get_second_level_data('language', 'id', compat_str), + 'license': try_get_second_level_data('licence', 'label', compat_str), 'duration': int_or_none(video.get('duration')), 'view_count': int_or_none(video.get('views')), 'like_count': int_or_none(video.get('likes')), From 28dea4b6380bfe32d2dc8c52fceecd02619ba952 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Tue, 18 Feb 2020 12:17:59 +1100 Subject: [PATCH 26/28] [PeerTube] Improve code quality of captions extractor --- youtube_dl/extractor/peertube.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 435a2bd71..32df14be3 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -500,12 +500,10 @@ class PeerTubeIE(InfoExtractor): language_id = try_get(entry, lambda x: x['language']['id'], compat_str) caption_path = str_or_none(entry.get('captionPath')) if language_id and caption_path: - caption_url = 'https://%s%s' % (host, caption_path) - caption_dict = { - 'url': caption_url - } - if subtitles.setdefault(language_id, [caption_dict]) != [caption_dict]: - subtitles[language_id].append(caption_dict) + caption_url = urljoin('https://%s' % host, entry.get('captionPath')) + subtitles.setdefault(language_id, []).append({ + 'url': caption_url, + }) return subtitles def _real_extract(self, url): From f0542cf7563833d469b05c1bea12ca99140eb997 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Tue, 18 Feb 2020 12:22:42 +1100 Subject: [PATCH 27/28] [PeerTube] Enforce string type for description, ids --- youtube_dl/extractor/peertube.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 32df14be3..519d5fc5a 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -435,10 +435,10 @@ class PeerTubeIE(InfoExtractor): 'timestamp': 1538391166, 'upload_date': '20181001', 'uploader': 'Framasoft', - 'uploader_id': 3, + 'uploader_id': '3', 'uploader_url': 'https://framatube.org/accounts/framasoft', 'channel': 'Les vidéos de Framasoft', - 'channel_id': 2, + 'channel_id': '2', 'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8', 'language': 'en', 'license': 'Attribution - Share Alike', @@ -540,7 +540,7 @@ class PeerTubeIE(InfoExtractor): description = None if isinstance(video_description, dict): - description = video_description.get('description') + description = str_or_none(video_description.get('description')) subtitles = self.extract_subtitles(host, video_id) @@ -569,10 +569,10 @@ class PeerTubeIE(InfoExtractor): 'thumbnail': urljoin(url, video.get('thumbnailPath')), 'timestamp': unified_timestamp(video.get('publishedAt')), 'uploader': account_data('displayName', compat_str), - 'uploader_id': account_data('id', int), + 'uploader_id': str(account_data('id', int)), 'uploader_url': url_or_none(account_data('url', compat_str)), 'channel': channel_data('displayName', compat_str), - 'channel_id': channel_data('id', int), + 'channel_id': str(channel_data('id', int)), 'channel_url': url_or_none(channel_data('url', compat_str)), 'language': try_get_second_level_data('language', 'id', compat_str), 'license': try_get_second_level_data('licence', 'label', compat_str), From d1af4dafab5e6660028da3e4647f1d34763f4425 Mon Sep 17 00:00:00 2001 From: 3risian <59593325+3risian@users.noreply.github.com> Date: Thu, 20 Feb 2020 13:27:22 +1100 Subject: [PATCH 28/28] [PeerTube] Rename try_get_second_level_data() to data() --- youtube_dl/extractor/peertube.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 519d5fc5a..307712196 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -544,16 +544,16 @@ class PeerTubeIE(InfoExtractor): subtitles = self.extract_subtitles(host, video_id) - def try_get_second_level_data(section, field, type_): + def data(section, field, type_): return try_get(video, lambda x: x[section][field], type_) def account_data(field, type_): - return try_get_second_level_data('account', field, type_) + return data('account', field, type_) def channel_data(field, type_): - return try_get_second_level_data('channel', field, type_) + return data('channel', field, type_) - category = try_get_second_level_data('category', 'label', compat_str) + category = data('category', 'label', compat_str) categories = [category] if category else None nsfw = video.get('nsfw') @@ -574,8 +574,8 @@ class PeerTubeIE(InfoExtractor): 'channel': channel_data('displayName', compat_str), 'channel_id': str(channel_data('id', int)), 'channel_url': url_or_none(channel_data('url', compat_str)), - 'language': try_get_second_level_data('language', 'id', compat_str), - 'license': try_get_second_level_data('licence', 'label', compat_str), + 'language': data('language', 'id', compat_str), + 'license': data('licence', 'label', compat_str), 'duration': int_or_none(video.get('duration')), 'view_count': int_or_none(video.get('views')), 'like_count': int_or_none(video.get('likes')),