From 79d2077edc9c4ac8c71b672d8eff06ef847842c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 Mar 2019 00:42:14 +0700 Subject: [PATCH] [extractor/common] Fix url meta field for unfragmented DASH formats (closes #20346) --- youtube_dl/extractor/common.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dfd0584d3..c291bc1df 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -108,10 +108,13 @@ class InfoExtractor(object): for RTMP - RTMP URL, for HLS - URL of the M3U8 media playlist, for HDS - URL of the F4M manifest, - for DASH - URL of the MPD manifest or - base URL representing the media - if MPD manifest is parsed from - a string, + for DASH + - HTTP URL to plain file media (in case of + unfragmented media) + - URL of the MPD manifest or base URL + representing the media if MPD manifest + is parsed froma string (in case of + fragmented media) for MSS - URL of the ISM manifest. * manifest_url The URL of the manifest file in case of @@ -2137,8 +2140,6 @@ class InfoExtractor(object): bandwidth = int_or_none(representation_attrib.get('bandwidth')) f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, - # NB: mpd_url may be empty when MPD manifest is parsed from a string - 'url': mpd_url or base_url, 'manifest_url': mpd_url, 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), @@ -2277,10 +2278,14 @@ class InfoExtractor(object): fragment['duration'] = segment_duration fragments.append(fragment) representation_ms_info['fragments'] = fragments - # NB: MPD manifest may contain direct URLs to unfragmented media. - # No fragments key is present in this case. + # If there is a fragments key available then we correctly recognized fragmented media. + # Otherwise we will assume unfragmented media with direct access. Technically, such + # assumption is not necessarily correct since we may simply have no support for + # some forms of fragmented media renditions yet, but for now we'll use this fallback. if 'fragments' in representation_ms_info: f.update({ + # NB: mpd_url may be empty when MPD manifest is parsed from a string + 'url': mpd_url or base_url, 'fragment_base_url': base_url, 'fragments': [], 'protocol': 'http_dash_segments', @@ -2291,6 +2296,10 @@ class InfoExtractor(object): f['url'] = initialization_url f['fragments'].append({location_key(initialization_url): initialization_url}) f['fragments'].extend(representation_ms_info['fragments']) + else: + # Assuming direct URL to unfragmented media. + f['url'] = base_url + # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation # is not necessarily unique within a Period thus formats with # the same `format_id` are quite possible. There are numerous examples