From 1ad6dd845b4af1d238b5bc3308cbe47060e1ab7b Mon Sep 17 00:00:00 2001 From: dirkf Date: Tue, 6 Oct 2020 17:31:06 +0100 Subject: [PATCH 1/5] Try all possible mediaselectors even if one succeeds Some mediaselectors may be parseable but have no formats (eg captions only). This revision tries all possible mediaselectors and assembles the formats and subtitles found. If any formats are found, the known exceptions ('notukerror', 'geolocation', 'selectionunavailable') are not propagated. --- youtube_dl/extractor/bbc.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 002c39c39..ca0634e6c 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -350,16 +350,26 @@ class BBCCoUkIE(InfoExtractor): def _download_media_selector(self, programme_id): last_exception = None + formats = [] + subtitles = [] + # as some mediaselectors may be parseable but have + # no formats (eg captions only), try all possible + # mediaselectors for mediaselector_url in self._MEDIASELECTOR_URLS: try: - return self._download_media_selector_url( + formatsAndSubtitles = self._download_media_selector_url( mediaselector_url % programme_id, programme_id) + formats += formatsAndSubtitles[0] + subtitles += formatsAndSubtitles[1] except BBCCoUkIE.MediaSelectionError as e: if e.id in ('notukerror', 'geolocation', 'selectionunavailable'): last_exception = e continue self._raise_extractor_error(e) - self._raise_extractor_error(last_exception) + # ignore a trapped exception if formats were found + if last_exception and not formats: + self._raise_extractor_error(last_exception) + return formats, subtitles def _download_media_selector_url(self, url, programme_id=None): media_selection = self._download_xml( From f9588ec2cd14090d762e0e6d96e6bda896ef4b8f Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 7 Oct 2020 02:56:27 +0100 Subject: [PATCH 2/5] Fix for flake8 Trailing spaces in comments FFS! --- youtube_dl/extractor/bbc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index ca0634e6c..a67ced3f6 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -352,7 +352,7 @@ class BBCCoUkIE(InfoExtractor): last_exception = None formats = [] subtitles = [] - # as some mediaselectors may be parseable but have + # as some mediaselectors may be parseable but have # no formats (eg captions only), try all possible # mediaselectors for mediaselector_url in self._MEDIASELECTOR_URLS: @@ -366,7 +366,7 @@ class BBCCoUkIE(InfoExtractor): last_exception = e continue self._raise_extractor_error(e) - # ignore a trapped exception if formats were found + # ignore a trapped exception if formats were found if last_exception and not formats: self._raise_extractor_error(last_exception) return formats, subtitles From 94f4d75bf644c8066ee27be36fbe76225517f9db Mon Sep 17 00:00:00 2001 From: dirkf Date: Tue, 13 Oct 2020 10:49:27 +0000 Subject: [PATCH 3/5] Update bbc.py The list of subtitles is initialised to None and may never become a list if no captions are found in the playlists. --- youtube_dl/extractor/bbc.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index a67ced3f6..76dfa0598 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -359,8 +359,12 @@ class BBCCoUkIE(InfoExtractor): try: formatsAndSubtitles = self._download_media_selector_url( mediaselector_url % programme_id, programme_id) - formats += formatsAndSubtitles[0] - subtitles += formatsAndSubtitles[1] + # formats should always be set, but just in case + if formatsAndSubtitles[0]: + formats += formatsAndSubtitles[0] + # subtitles may never be set + if formatsAndSubtitles[1]: + subtitles += formatsAndSubtitles[1] except BBCCoUkIE.MediaSelectionError as e: if e.id in ('notukerror', 'geolocation', 'selectionunavailable'): last_exception = e From 9b74c4b7db073835ac5eaf53da9312258540e6d9 Mon Sep 17 00:00:00 2001 From: dirkf Date: Tue, 13 Oct 2020 13:38:45 +0000 Subject: [PATCH 4/5] Handle subtitles properly The subtitles are parsed from each mediaselector playlist in the form of a dict keyed by language each of whose values is a list containing a dict keyed by url and sttl type. There seems to be no attempt in the extractor to make this list contain more than one dict. So a strategy is needed to deal with multiple sttl types for the same language. For backward compatibility, let the first of any duplicate sttl languages take precedence. --- youtube_dl/extractor/bbc.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 76dfa0598..754516b6a 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -359,12 +359,17 @@ class BBCCoUkIE(InfoExtractor): try: formatsAndSubtitles = self._download_media_selector_url( mediaselector_url % programme_id, programme_id) - # formats should always be set, but just in case + # formats (a list) should always be set, but just in case if formatsAndSubtitles[0]: formats += formatsAndSubtitles[0] - # subtitles may never be set + # subtitles subtitles (a dict {(lang,sttl)}) if formatsAndSubtitles[1]: - subtitles += formatsAndSubtitles[1] + if not subtitles: + subtitles = formatsAndSubtitles[1] + else: + # prioritise the first sttl for each lang + formatsAndSubtitles[1].update(subtitles) + subtitles = formatsAndSubtitles[1] except BBCCoUkIE.MediaSelectionError as e: if e.id in ('notukerror', 'geolocation', 'selectionunavailable'): last_exception = e From 7671a9442c7d8ace55cf6cfdc9f3be4bd2304312 Mon Sep 17 00:00:00 2001 From: dirkf Date: Wed, 14 Oct 2020 11:55:03 +0000 Subject: [PATCH 5/5] Simplify modified subtitle handling --- youtube_dl/extractor/bbc.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 754516b6a..d41271ee5 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -351,7 +351,7 @@ class BBCCoUkIE(InfoExtractor): def _download_media_selector(self, programme_id): last_exception = None formats = [] - subtitles = [] + subtitles = None # as some mediaselectors may be parseable but have # no formats (eg captions only), try all possible # mediaselectors @@ -364,12 +364,10 @@ class BBCCoUkIE(InfoExtractor): formats += formatsAndSubtitles[0] # subtitles subtitles (a dict {(lang,sttl)}) if formatsAndSubtitles[1]: - if not subtitles: - subtitles = formatsAndSubtitles[1] - else: + if subtitles: # prioritise the first sttl for each lang formatsAndSubtitles[1].update(subtitles) - subtitles = formatsAndSubtitles[1] + subtitles = formatsAndSubtitles[1] except BBCCoUkIE.MediaSelectionError as e: if e.id in ('notukerror', 'geolocation', 'selectionunavailable'): last_exception = e