From dd82ffea0c3a0dcf67f8e9fca7226de3a2899425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 2 Jul 2013 10:08:58 +0200 Subject: [PATCH 1/6] Implement format selection in YoutubeDL Now the IEs can set a formats field in the info_dict, with the formats ordered from worst to best quality. It's a list of dicts with the following fields: * Mandatory: url and ext * Optional: format and format_id The format_id is used for choosing which formats have to be downloaded. Now a video result is processed by the method process_video_result. --- youtube_dl/YoutubeDL.py | 80 +++++++++++++++++++++++++++++++++++++---- youtube_dl/__init__.py | 2 +- 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e85e03fa4..feb105861 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -385,13 +385,7 @@ class YoutubeDL(object): result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system if result_type == 'video': ie_result.update(extra_info) - if 'playlist' not in ie_result: - # It isn't part of a playlist - ie_result['playlist'] = None - ie_result['playlist_index'] = None - if download: - self.process_info(ie_result) - return ie_result + return self.process_video_result(ie_result) elif result_type == 'url': # We have to add extra_info to the results because it may be # contained in a playlist @@ -449,6 +443,64 @@ class YoutubeDL(object): else: raise Exception('Invalid result type: %s' % result_type) + def process_video_result(self, info_dict, download=True): + assert info_dict.get('_type', 'video') == 'video' + + if 'playlist' not in info_dict: + # It isn't part of a playlist + info_dict['playlist'] = None + info_dict['playlist_index'] = None + + # We now pick which formats have to be downloaded + if info_dict.get('formats') is None: + # There's only one format available + formats = [info_dict] + else: + formats = info_dict['formats'] + + # We check that all the formats have the format and format_id fields + for (i, format) in enumerate(formats): + if format.get('format') is None: + format['format'] = compat_str(i) + if format.get('format_id') is None: + format['format_id'] = compat_str(i) + + if self.params.get('listformats', None): + self.list_formats(info_dict) + return + + req_format = self.params.get('format', 'best') + formats_to_download = [] + if req_format == 'best' or req_format is None: + formats_to_download = [formats[-1]] + elif req_format == 'worst': + formats_to_download = [formats[0]] + # The -1 is for supporting YoutubeIE + elif req_format in ('-1', 'all'): + formats_to_download = formats + else: + # We can accept formats requestd in the format: 34/10/5, we pick + # the first that is availble, starting from left + req_formats = req_format.split('/') + for rf in req_formats: + matches = filter(lambda f:f['format_id'] == rf ,formats) + if matches: + formats_to_download = [matches[0]] + break + if not formats_to_download: + raise ExtractorError(u'requested format not available') + + if download: + if len(formats_to_download) > 1: + self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download))) + for format in formats_to_download: + new_info = dict(info_dict) + new_info.update(format) + self.process_info(new_info) + # We update the info dict with the best quality format (backwards compatibility) + info_dict.update(formats_to_download[-1]) + return info_dict + def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -655,3 +707,17 @@ class YoutubeDL(object): vid_id = info_dict['extractor'] + u' ' + info_dict['id'] with locked_file(fn, 'a', encoding='utf-8') as archive_file: archive_file.write(vid_id + u'\n') + + def list_formats(self, info_dict): + formats_s = [] + for format in info_dict.get('formats', [info_dict]): + formats_s.append("%s\t:\t%s\t[%s]" % (format['format_id'], + format['ext'], + format.get('format', '???'), + ) + ) + if len(formats_s) != 1: + formats_s[0] += ' (worst)' + formats_s[-1] += ' (best)' + formats_s = "\n".join(formats_s) + self.to_screen(u"[info] Available formats for %s:\nformat code\textension\n%s" % (info_dict['id'], formats_s)) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 3513d719f..bc8e97250 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -208,7 +208,7 @@ def parseOpts(overrideArguments=None): video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FORMAT', + action='store', dest='format', metavar='FORMAT', default='best', help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported') video_format.add_option('--all-formats', action='store_const', dest='format', help='download all available video formats', const='all') From 99e206d508646b183ef315da162147ed6fd75442 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 8 Jul 2013 12:10:47 +0200 Subject: [PATCH 2/6] Implement the max quality option in YoutubeDL --- youtube_dl/YoutubeDL.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index feb105861..d88378dda 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -469,6 +469,10 @@ class YoutubeDL(object): self.list_formats(info_dict) return + format_limit = self.params.get('format_limit', None) + if format_limit: + formats = [f for f in formats if f['format_id'] <= format_limit] + req_format = self.params.get('format', 'best') formats_to_download = [] if req_format == 'best' or req_format is None: From 6ff000b888a3da702a894addd9f9824139fd8c8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 13 Jul 2013 17:51:26 +0200 Subject: [PATCH 3/6] Do not handle format selection for IEs that already handle it --- youtube_dl/YoutubeDL.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d88378dda..c6235abd3 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -451,6 +451,11 @@ class YoutubeDL(object): info_dict['playlist'] = None info_dict['playlist_index'] = None + # This extractors handle format selection themselves + if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']: + self.process_info(info_dict) + return info_dict + # We now pick which formats have to be downloaded if info_dict.get('formats') is None: # There's only one format available From 79819f58f2328cdb08272c55d01965cd8c6624ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 13 Jul 2013 18:19:37 +0200 Subject: [PATCH 4/6] Default 'format' field to {width}x{height} If width is None, use {height}p and if height is None, '???' --- youtube_dl/YoutubeDL.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c6235abd3..829a70ec9 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -466,9 +466,16 @@ class YoutubeDL(object): # We check that all the formats have the format and format_id fields for (i, format) in enumerate(formats): if format.get('format') is None: - format['format'] = compat_str(i) + if format.get('height') is not None: + if format.get('width') is not None: + format_desc = u'%sx%s' % (format['width'], format['height']) + else: + format_desc = u'%sp' % format['height'] + else: + format_desc = compat_str(i) + format['format'] = format_desc if format.get('format_id') is None: - format['format_id'] = compat_str(i) + format['format_id'] = '???' if self.params.get('listformats', None): self.list_formats(info_dict) From e028d0d1e3ffed0a323b41431dbbfc804aa9553e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 14 Jul 2013 17:24:18 +0200 Subject: [PATCH 5/6] Implement the prefer_free_formats in YoutubeDL --- test/test_YoutubeDL.py | 49 +++++++++++++++++++++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 9 ++++++++ 2 files changed, 58 insertions(+) create mode 100644 test/test_YoutubeDL.py diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py new file mode 100644 index 000000000..2b9fb92ee --- /dev/null +++ b/test/test_YoutubeDL.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +import sys +import unittest + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from helper import FakeYDL, parameters + +class YDL(FakeYDL): + def __init__(self): + super(YDL, self).__init__() + self.downloaded_info_dicts = [] + def process_info(self, info_dict): + self.downloaded_info_dicts.append(info_dict) + +class TestFormatSelection(unittest.TestCase): + def test_prefer_free_formats(self): + # Same resolution => download webm + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [{u'ext': u'webm', u'height': 460},{u'ext': u'mp4', u'height': 460}] + info_dict = {u'formats': formats, u'extractor': u'test'} + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'ext'], u'webm') + + # Different resolution => download best quality (mp4) + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [{u'ext': u'webm', u'height': 720},{u'ext': u'mp4',u'height': 1080}] + info_dict[u'formats'] = formats + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'ext'], u'mp4') + + # No prefer_free_formats => keep original formats order + ydl = YDL() + ydl.params['prefer_free_formats'] = False + formats = [{u'ext': u'webm', u'height': 720},{u'ext': u'flv',u'height': 720}] + info_dict[u'formats'] = formats + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded[u'ext'], u'flv') + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 829a70ec9..e159aa336 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -484,6 +484,15 @@ class YoutubeDL(object): format_limit = self.params.get('format_limit', None) if format_limit: formats = [f for f in formats if f['format_id'] <= format_limit] + if self.params.get('prefer_free_formats'): + def _free_formats_key(f): + try: + ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext']) + except ValueError: + ext_ord = -1 + # We only compare the extension if they have the same height and width + return (f.get('height'), f.get('width'), ext_ord) + formats = sorted(formats, key=_free_formats_key) req_format = self.params.get('format', 'best') formats_to_download = [] From 8016c9229718080f5211b9f9da176992622b30e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 14 Jul 2013 17:31:52 +0200 Subject: [PATCH 6/6] Fix the default values of format_id and format --- youtube_dl/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e159aa336..a32e50772 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -472,10 +472,10 @@ class YoutubeDL(object): else: format_desc = u'%sp' % format['height'] else: - format_desc = compat_str(i) + format_desc = '???' format['format'] = format_desc if format.get('format_id') is None: - format['format_id'] = '???' + format['format_id'] = compat_str(i) if self.params.get('listformats', None): self.list_formats(info_dict)