From df8db1aa2107f204fa14c157d7a536e45ceb65c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 26 Feb 2013 23:33:58 +0100 Subject: [PATCH 01/14] Create extract_info method --- youtube_dl/FileDownloader.py | 85 +++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 192ad37d2..b26c34729 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -377,6 +377,44 @@ class FileDownloader(object): if re.search(rejecttitle, title, re.IGNORECASE): return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None + + def extract_info(self, url): + ''' + Returns a list with a dictionary for each video we find. + ''' + suitable_found = False + for ie in self._ies: + # Go to next InfoExtractor if not suitable + if not ie.suitable(url): + continue + + # Warn if the _WORKING attribute is False + if not ie.working(): + self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, ' + u'and will probably not work. If you want to go on, use the -i option.') + + # Suitable InfoExtractor found + suitable_found = True + + # Extract information from URL and process it + try: + videos = ie.extract(url) + for video in videos or []: + if not 'extractor' in video: + #The extractor has already been set somewher else + video['extractor'] = ie.IE_NAME + return videos + except ExtractorError as de: # An error we somewhat expected + self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback()) + break + except Exception as e: + if self.params.get('ignoreerrors', False): + self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc())) + break + else: + raise + if not suitable_found: + self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" @@ -488,49 +526,14 @@ class FileDownloader(object): raise SameFileError(self.params['outtmpl']) for url in url_list: - suitable_found = False - for ie in self._ies: - # Go to next InfoExtractor if not suitable - if not ie.suitable(url): - continue - - # Warn if the _WORKING attribute is False - if not ie.working(): - self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, ' - u'and will probably not work. If you want to go on, use the -i option.') + videos = self.extract_info(url) - # Suitable InfoExtractor found - suitable_found = True - - # Extract information from URL and process it + for video in videos or []: try: - videos = ie.extract(url) - except ExtractorError as de: # An error we somewhat expected - self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback()) - break - except Exception as e: - if self.params.get('ignoreerrors', False): - self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc())) - break - else: - raise - - if len(videos or []) > 1 and self.fixed_template(): - raise SameFileError(self.params['outtmpl']) - - for video in videos or []: - video['extractor'] = ie.IE_NAME - try: - self.increment_downloads() - self.process_info(video) - except UnavailableVideoError: - self.trouble(u'\nERROR: unable to download video') - - # Suitable InfoExtractor had been found; go to next URL - break - - if not suitable_found: - self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) + self.increment_downloads() + self.process_info(video) + except UnavailableVideoError: + self.trouble(u'\nERROR: unable to download video') return self._download_retcode From 631f73978c0ee851950ac697dfd73f9092abd3c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 4 Mar 2013 22:16:42 +0100 Subject: [PATCH 02/14] Add a method for extracting info from a list of urls --- youtube_dl/FileDownloader.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index b26c34729..f668b362b 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -415,6 +415,14 @@ class FileDownloader(object): raise if not suitable_found: self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) + def extract_info_iterable(self, urls): + ''' + Return the videos founded for the urls + ''' + results = [] + for url in urls: + results.extend(self.extract_info(url)) + return results def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" From 597cc8a45536aa4207c5ffc3e421fcebf2e08fe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 5 Mar 2013 11:58:01 +0100 Subject: [PATCH 03/14] Use extract_info in YoutubePlaylist and YoutubeSearch --- test/test_youtube_lists.py | 16 +++++++++------- youtube_dl/InfoExtractors.py | 8 +++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index f4705bc5b..055bf69c8 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,6 +10,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.InfoExtractors import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE from youtube_dl.utils import * +from youtube_dl.FileDownloader import FileDownloader PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: @@ -22,7 +23,7 @@ proxy_handler = compat_urllib_request.ProxyHandler() opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) compat_urllib_request.install_opener(opener) -class FakeDownloader(object): +class FakeDownloader(FileDownloader): def __init__(self): self.result = [] self.params = parameters @@ -30,15 +31,16 @@ class FakeDownloader(object): print(s) def trouble(self, s): raise Exception(s) - def download(self, x): - self.result.append(x) + def extract_info(self, url): + self.result.append(url) + return url class TestYoutubeLists(unittest.TestCase): def test_youtube_playlist(self): dl = FakeDownloader() ie = YoutubePlaylistIE(dl) ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') - ytie_results = [YoutubeIE()._extract_id(r[0]) for r in dl.result] + ytie_results = [YoutubeIE()._extract_id(url) for url in dl.result] self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) def test_issue_673(self): @@ -58,7 +60,7 @@ class TestYoutubeLists(unittest.TestCase): dl = FakeDownloader() ie = YoutubePlaylistIE(dl) ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') - ytie_results = [YoutubeIE()._extract_id(r[0]) for r in dl.result] + ytie_results = [YoutubeIE()._extract_id(url) for url in dl.result] self.assertFalse('pElCt5oNDuI' in ytie_results) self.assertFalse('KdPEApIVdWM' in ytie_results) @@ -67,9 +69,9 @@ class TestYoutubeLists(unittest.TestCase): ie = YoutubePlaylistIE(dl) # TODO find a > 100 (paginating?) videos course ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') - self.assertEqual(YoutubeIE()._extract_id(dl.result[0][0]), 'j9WZyLZCBzs') + self.assertEqual(YoutubeIE()._extract_id(dl.result[0]), 'j9WZyLZCBzs') self.assertEqual(len(dl.result), 25) - self.assertEqual(YoutubeIE()._extract_id(dl.result[-1][0]), 'rYefUsYuEp0') + self.assertEqual(YoutubeIE()._extract_id(dl.result[-1]), 'rYefUsYuEp0') def test_youtube_channel(self): # I give up, please find a channel that does paginate and test this like test_youtube_playlist_long diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 7ce84fe79..8a7694a76 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1756,9 +1756,7 @@ class YoutubePlaylistIE(InfoExtractor): else: self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(videos))) - for video in videos: - self._downloader.download([video]) - return + return self._downloader.extract_info_iterable(videos) class YoutubeChannelIE(InfoExtractor): @@ -1892,8 +1890,8 @@ class YoutubeUserIE(InfoExtractor): self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" % (username, all_ids_count, len(video_ids))) - for video_id in video_ids: - self._downloader.download(['http://www.youtube.com/watch?v=%s' % video_id]) + urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] + return self._downloader.extract_info_iterable(urls) class BlipTVUserIE(InfoExtractor): From f6e6da9525150487476d4990693eedf73acffab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 5 Mar 2013 12:26:18 +0100 Subject: [PATCH 04/14] Use extract_info in BlipTV User and Youtube Channel --- youtube_dl/InfoExtractors.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 8a7694a76..d79f6068f 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1806,9 +1806,8 @@ class YoutubeChannelIE(InfoExtractor): self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) - for id in video_ids: - self._downloader.download(['http://www.youtube.com/watch?v=%s' % id]) - return + urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] + return self._downloader.extract_info_iterable(urls) class YoutubeUserIE(InfoExtractor): @@ -1981,8 +1980,8 @@ class BlipTVUserIE(InfoExtractor): self._downloader.to_screen(u"[%s] user %s: Collected %d video ids (downloading %d of them)" % (self.IE_NAME, username, all_ids_count, len(video_ids))) - for video_id in video_ids: - self._downloader.download([u'http://blip.tv/'+video_id]) + urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] + return self._downloader.extract_info_iterable(urls) class DepositFilesIE(InfoExtractor): From 6ac7f082c469b3b2153735ae8475e1d0fc8b5439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 5 Mar 2013 20:14:32 +0100 Subject: [PATCH 05/14] `extract_info` now expects `ie.extract` to return a list in the format proposed in issue 608. Each element should have a '_type' key specifying if it's a video, an url or a playlist. `extract_info` will process each element to get the full info --- youtube_dl/FileDownloader.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 9b630c123..68fad11bc 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -410,12 +410,9 @@ class FileDownloader(object): # Extract information from URL and process it try: - videos = ie.extract(url) - for video in videos or []: - if not 'extractor' in video: - #The extractor has already been set somewher else - video['extractor'] = ie.IE_NAME - return videos + ie_results = ie.extract(url) + results = self.process_ie_results(ie_results, ie) + return results except ExtractorError as de: # An error we somewhat expected self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback()) break @@ -435,6 +432,29 @@ class FileDownloader(object): for url in urls: results.extend(self.extract_info(url)) return results + + def process_ie_results(self, ie_results, ie): + """ + Take the results of the ie and return a list of videos. + For url elements it will seartch the suitable ie and get the videos + For playlist elements it will process each of the elements of the 'entries' key + """ + results = [] + for result in ie_results or []: + result_type = result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system + if result_type == 'video': + if not 'extractor' in result: + #The extractor has already been set somewhere else + result['extractor'] = ie.IE_NAME + results.append(result) + elif result_type == 'url': + #We get the videos pointed by the url + results.extend(self.extract_info(result['url'])) + elif result_type == 'playlist': + #We process each entry in the playlist + entries_result = self.process_ie_results(result['entries'], ie) + results.extend(entries_result) + return results def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" From 8a38a194fb08a253986cdbafa02cf699ef76c9a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 5 Mar 2013 20:55:48 +0100 Subject: [PATCH 06/14] Add auxiliary methods to InfoExtractor to set the '_type' key and use them for some playlist IEs --- test/test_youtube_lists.py | 35 +++++++++++++++++++++-------------- youtube_dl/InfoExtractors.py | 30 ++++++++++++++++++++++++++---- 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 055bf69c8..9e91484f8 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -36,31 +36,37 @@ class FakeDownloader(FileDownloader): return url class TestYoutubeLists(unittest.TestCase): + def assertIsPlaylist(self,info): + """Make sure the info has '_type' set to 'playlist'""" + self.assertEqual(info['_type'], 'playlist') + def test_youtube_playlist(self): dl = FakeDownloader() ie = YoutubePlaylistIE(dl) - ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') - ytie_results = [YoutubeIE()._extract_id(url) for url in dl.result] + result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0] + self.assertIsPlaylist(result) + ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) def test_issue_673(self): dl = FakeDownloader() ie = YoutubePlaylistIE(dl) - ie.extract('PLBB231211A4F62143') - self.assertTrue(len(dl.result) > 40) + result = ie.extract('PLBB231211A4F62143')[0] + self.assertTrue(len(result['entries']) > 40) def test_youtube_playlist_long(self): dl = FakeDownloader() ie = YoutubePlaylistIE(dl) - ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') - self.assertTrue(len(dl.result) >= 799) + result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0] + self.assertIsPlaylist(result) + self.assertTrue(len(result['entries']) >= 799) def test_youtube_playlist_with_deleted(self): #651 dl = FakeDownloader() ie = YoutubePlaylistIE(dl) - ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') - ytie_results = [YoutubeIE()._extract_id(url) for url in dl.result] + result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0] + ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] self.assertFalse('pElCt5oNDuI' in ytie_results) self.assertFalse('KdPEApIVdWM' in ytie_results) @@ -68,10 +74,11 @@ class TestYoutubeLists(unittest.TestCase): dl = FakeDownloader() ie = YoutubePlaylistIE(dl) # TODO find a > 100 (paginating?) videos course - ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') - self.assertEqual(YoutubeIE()._extract_id(dl.result[0]), 'j9WZyLZCBzs') - self.assertEqual(len(dl.result), 25) - self.assertEqual(YoutubeIE()._extract_id(dl.result[-1]), 'rYefUsYuEp0') + result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0] + entries = result['entries'] + self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs') + self.assertEqual(len(entries), 25) + self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0') def test_youtube_channel(self): # I give up, please find a channel that does paginate and test this like test_youtube_playlist_long @@ -80,8 +87,8 @@ class TestYoutubeLists(unittest.TestCase): def test_youtube_user(self): dl = FakeDownloader() ie = YoutubeUserIE(dl) - ie.extract('https://www.youtube.com/user/TheLinuxFoundation') - self.assertTrue(len(dl.result) >= 320) + result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0] + self.assertTrue(len(result['entries']) >= 320) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index d79f6068f..895658f49 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -128,6 +128,24 @@ class InfoExtractor(object): urlh = self._request_webpage(url_or_request, video_id, note, errnote) webpage_bytes = urlh.read() return webpage_bytes.decode('utf-8', 'replace') + + #Methods for following #608 + #They set the correct value of the '_type' key + def video_result(self, video_info): + """Returns a video""" + video_info['_type'] = 'video' + return video_info + def url_result(self, url, ie=None): + """Returns a url that points to a page that should be processed""" + #TODO: ie should be the class used for getting the info + video_info = {'_type': 'url', + 'url': url} + return video_info + def playlist_result(self, entries): + """Returns a playlist""" + video_info = {'_type': 'playlist', + 'entries': entries} + return video_info class YoutubeIE(InfoExtractor): @@ -1756,7 +1774,8 @@ class YoutubePlaylistIE(InfoExtractor): else: self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(videos))) - return self._downloader.extract_info_iterable(videos) + url_results = [self.url_result(url) for url in videos] + return [self.playlist_result(url_results)] class YoutubeChannelIE(InfoExtractor): @@ -1807,7 +1826,8 @@ class YoutubeChannelIE(InfoExtractor): self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] - return self._downloader.extract_info_iterable(urls) + url_entries = [self.url_result(url) for url in urls] + return [self.playlist_result(url_entries)] class YoutubeUserIE(InfoExtractor): @@ -1890,7 +1910,8 @@ class YoutubeUserIE(InfoExtractor): (username, all_ids_count, len(video_ids))) urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] - return self._downloader.extract_info_iterable(urls) + url_results = [self.url_result(url) for url in urls] + return [self.playlist_result(url_results)] class BlipTVUserIE(InfoExtractor): @@ -1981,7 +2002,8 @@ class BlipTVUserIE(InfoExtractor): (self.IE_NAME, username, all_ids_count, len(video_ids))) urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] - return self._downloader.extract_info_iterable(urls) + url_entries = [self.url_result(url) for url in urls] + return [self.playlist_result(url_entries)] class DepositFilesIE(InfoExtractor): From a0d6fe7b924697c089ed7ae37df0ca590ac38a96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 5 Mar 2013 22:33:32 +0100 Subject: [PATCH 07/14] When a redirect is found return the new url using the new style --- youtube_dl/InfoExtractors.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 895658f49..e714fa6b0 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1311,7 +1311,7 @@ class GenericIE(InfoExtractor): self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url) def _test_redirect(self, url): - """Check if it is a redirect, like url shorteners, in case restart chain.""" + """Check if it is a redirect, like url shorteners, in case return the new url.""" class HeadRequest(compat_urllib_request.Request): def get_method(self): return "HEAD" @@ -1362,11 +1362,11 @@ class GenericIE(InfoExtractor): return False self.report_following_redirect(new_url) - self._downloader.download([new_url]) - return True + return new_url def _real_extract(self, url): - if self._test_redirect(url): return + new_url = self._test_redirect(url) + if new_url: return [self.url_result(new_url)] video_id = url.split('/')[-1] request = compat_urllib_request.Request(url) From d2c690828a8297c014d8053fbdee4e26fe11586a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 28 Mar 2013 13:39:00 +0100 Subject: [PATCH 08/14] Add title and id to playlist results Not all IE give both. They are not used yet. --- youtube_dl/InfoExtractors.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index dd4a776e4..6053d14ec 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -147,10 +147,14 @@ class InfoExtractor(object): video_info = {'_type': 'url', 'url': url} return video_info - def playlist_result(self, entries): + def playlist_result(self, entries, playlist_id=None, playlist_title=None): """Returns a playlist""" video_info = {'_type': 'playlist', 'entries': entries} + if playlist_id: + video_info['id'] = playlist_id + if playlist_title: + video_info['title'] = playlist_title return video_info @@ -1808,7 +1812,7 @@ class YoutubePlaylistIE(InfoExtractor): self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(videos))) url_results = [self.url_result(url) for url in videos] - return [self.playlist_result(url_results)] + return [self.playlist_result(url_results, playlist_id)] class YoutubeChannelIE(InfoExtractor): @@ -1860,7 +1864,7 @@ class YoutubeChannelIE(InfoExtractor): urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] url_entries = [self.url_result(url) for url in urls] - return [self.playlist_result(url_entries)] + return [self.playlist_result(url_entries, channel_id)] class YoutubeUserIE(InfoExtractor): @@ -1944,7 +1948,7 @@ class YoutubeUserIE(InfoExtractor): urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] url_results = [self.url_result(url) for url in urls] - return [self.playlist_result(url_results)] + return [self.playlist_result(url_results, playlist_title = username)] class BlipTVUserIE(InfoExtractor): @@ -2036,7 +2040,7 @@ class BlipTVUserIE(InfoExtractor): urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] url_entries = [self.url_result(url) for url in urls] - return [self.playlist_result(url_entries)] + return [self.playlist_result(url_entries, playlist_title = username)] class DepositFilesIE(InfoExtractor): From 7eab8dc7504cf1f5f1dd03eb62e266ce24948b93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 29 Mar 2013 12:32:42 +0100 Subject: [PATCH 09/14] Pass the playlist info_dict to process_info the playlist value can be used in the output template --- README.md | 1 + youtube_dl/FileDownloader.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7c09d0c0d..1f3422ef8 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,7 @@ The `-o` option allows users to indicate a template for the output file names. T - `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4). - `epoch`: The sequence will be replaced by the Unix epoch when creating the file. - `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. + - `playlist`: The name or the id of the playlist that contains the video. The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment). diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 6af2acbee..d2b9be9ef 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -460,12 +460,21 @@ class FileDownloader(object): elif result_type == 'playlist': #We process each entry in the playlist entries_result = self.process_ie_results(result['entries'], ie) - results.extend(entries_result) + result['entries'] = entries_result + results.extend([result]) return results def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" + if info_dict.get('_type','video') == 'playlist': + playlist = info_dict.get('title', None) or info_dict.get('id', None) + self.to_screen(u'[download] Downloading playlist: %s' % playlist) + for video in info_dict['entries']: + video['playlist'] = playlist + self.process_info(video) + return + # Keep for backwards compatibility info_dict['stitle'] = info_dict['title'] From d39919c03e45b3e8f804c23f78fae33cb4adc7df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 5 Apr 2013 13:01:59 +0200 Subject: [PATCH 10/14] Add progress counter for playlists Closes #276 --- youtube_dl/FileDownloader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 2237d355d..ba3277577 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -492,8 +492,10 @@ class FileDownloader(object): if info_dict.get('_type','video') == 'playlist': playlist = info_dict.get('title', None) or info_dict.get('id', None) self.to_screen(u'[download] Downloading playlist: %s' % playlist) - for video in info_dict['entries']: + n_videos = len(info_dict['entries']) + for i,video in enumerate(info_dict['entries'],1): video['playlist'] = playlist + self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_videos)) self.process_info(video) return From 146c12a2dafdb9ff0e5138aa0f9da38bddca6c8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 10 Apr 2013 00:05:04 +0200 Subject: [PATCH 11/14] Change the order for extracting/downloading Now it gets a video info and directly downloads it, the it pass to the next video founded. --- youtube_dl/FileDownloader.py | 103 ++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index ba3277577..58be5caee 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -419,9 +419,10 @@ class FileDownloader(object): return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None - def extract_info(self, url): + def extract_info(self, url, download = True): ''' Returns a list with a dictionary for each video we find. + If 'download', also downloads the videos. ''' suitable_found = False for ie in self._ies: @@ -440,7 +441,12 @@ class FileDownloader(object): # Extract information from URL and process it try: ie_results = ie.extract(url) - results = self.process_ie_results(ie_results, ie) + results = [] + for ie_result in ie_results: + if not 'extractor' in ie_result: + #The extractor has already been set somewhere else + ie_result['extractor'] = ie.IE_NAME + results.append(self.process_ie_result(ie_result, download)) return results except ExtractorError as de: # An error we somewhat expected self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback()) @@ -453,51 +459,51 @@ class FileDownloader(object): raise if not suitable_found: self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) - def extract_info_iterable(self, urls): - ''' - Return the videos founded for the urls - ''' - results = [] - for url in urls: - results.extend(self.extract_info(url)) - return results - def process_ie_results(self, ie_results, ie): + def process_ie_result(self, ie_result, download = True): """ - Take the results of the ie and return a list of videos. - For url elements it will seartch the suitable ie and get the videos + Take the result of the ie and return a list of videos. + For url elements it will search the suitable ie and get the videos For playlist elements it will process each of the elements of the 'entries' key + + It will also download the videos if 'download'. """ - results = [] - for result in ie_results or []: - result_type = result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system - if result_type == 'video': - if not 'extractor' in result: - #The extractor has already been set somewhere else - result['extractor'] = ie.IE_NAME - results.append(result) - elif result_type == 'url': - #We get the videos pointed by the url - results.extend(self.extract_info(result['url'])) - elif result_type == 'playlist': - #We process each entry in the playlist - entries_result = self.process_ie_results(result['entries'], ie) - result['entries'] = entries_result - results.extend([result]) - return results + result_type = ie_result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system + if result_type == 'video': + if 'playlist' not in ie_result: + #It isn't part of a playlist + ie_result['playlist'] = None + if download: + #Do the download: + self.process_info(ie_result) + return ie_result + elif result_type == 'url': + #We get the video pointed by the url + result = self.extract_info(ie_result['url'], download)[0] + return result + elif result_type == 'playlist': + #We process each entry in the playlist + playlist = ie_result.get('title', None) or ie_result.get('id', None) + self.to_screen(u'[download] Downloading playlist: %s' % playlist) + n_videos = len(ie_result['entries']) + playlist_results = [] + for i,entry in enumerate(ie_result['entries'],1): + self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_videos)) + entry_result = self.process_ie_result(entry, False) + entry_result['playlist'] = playlist + #We must do the download here to correctly set the 'playlist' key + if download: + self.process_info(entry_result) + playlist_results.append(entry_result) + result = ie_result.copy() + result['entries'] = playlist_results + return result def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" - if info_dict.get('_type','video') == 'playlist': - playlist = info_dict.get('title', None) or info_dict.get('id', None) - self.to_screen(u'[download] Downloading playlist: %s' % playlist) - n_videos = len(info_dict['entries']) - for i,video in enumerate(info_dict['entries'],1): - video['playlist'] = playlist - self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_videos)) - self.process_info(video) - return + #We increment the download the download count here to match the previous behaviour. + self.increment_downloads() # Keep for backwards compatibility info_dict['stitle'] = info_dict['title'] @@ -633,17 +639,14 @@ class FileDownloader(object): raise SameFileError(self.params['outtmpl']) for url in url_list: - videos = self.extract_info(url) - - for video in videos or []: - try: - self.increment_downloads() - self.process_info(video) - except UnavailableVideoError: - self.trouble(u'\nERROR: unable to download video') - except MaxDownloadsReached: - self.to_screen(u'[info] Maximum number of downloaded files reached.') - raise + try: + #It also downloads the videos + videos = self.extract_info(url) + except UnavailableVideoError: + self.trouble(u'\nERROR: unable to download video') + except MaxDownloadsReached: + self.to_screen(u'[info] Maximum number of downloaded files reached.') + raise return self._download_retcode From 532d797824a1ec48480f1d10075e66a90aa53449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 10 Apr 2013 00:06:03 +0200 Subject: [PATCH 12/14] In MetacafeIE return a url if YoutubeIE should do the job --- youtube_dl/InfoExtractors.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 81eaddc72..b7371365a 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -723,8 +723,7 @@ class MetacafeIE(InfoExtractor): # Check if video comes from YouTube mobj2 = re.match(r'^yt-(.*)$', video_id) if mobj2 is not None: - self._downloader.download(['http://www.youtube.com/watch?v=%s' % mobj2.group(1)]) - return + return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1))] # Retrieve video webpage to extract further information request = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id) From bce878a7c1678ac698ecd556b2c77a1e2f2306df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 10 Apr 2013 14:32:03 +0200 Subject: [PATCH 13/14] Implement the playlist/start options in FileDownloader It makes it available for all the InfoExtractors --- youtube_dl/FileDownloader.py | 21 ++++++++++++++++++--- youtube_dl/InfoExtractors.py | 34 ---------------------------------- 2 files changed, 18 insertions(+), 37 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 58be5caee..5a5141ba5 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -485,10 +485,25 @@ class FileDownloader(object): #We process each entry in the playlist playlist = ie_result.get('title', None) or ie_result.get('id', None) self.to_screen(u'[download] Downloading playlist: %s' % playlist) - n_videos = len(ie_result['entries']) + playlist_results = [] - for i,entry in enumerate(ie_result['entries'],1): - self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_videos)) + + n_all_entries = len(ie_result['entries']) + playliststart = self.params.get('playliststart', 1) - 1 + playlistend = self.params.get('playlistend', -1) + + if playlistend == -1: + entries = ie_result['entries'][playliststart:] + else: + entries = ie_result['entries'][playliststart:playlistend] + + n_entries = len(entries) + + self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" % + (ie_result['extractor'], playlist, n_all_entries, n_entries)) + + for i,entry in enumerate(entries,1): + self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) entry_result = self.process_ie_result(entry, False) entry_result['playlist'] = playlist #We must do the download here to correctly set the 'playlist' key diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index b7371365a..a7fdf1607 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1806,19 +1806,6 @@ class YoutubePlaylistIE(InfoExtractor): page_num += 1 videos = [v[1] for v in sorted(videos)] - total = len(videos) - - playliststart = self._downloader.params.get('playliststart', 1) - 1 - playlistend = self._downloader.params.get('playlistend', -1) - if playlistend == -1: - videos = videos[playliststart:] - else: - videos = videos[playliststart:playlistend] - - if len(videos) == total: - self._downloader.to_screen(u'[youtube] PL %s: Found %i videos' % (playlist_id, total)) - else: - self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(videos))) url_results = [self.url_result(url) for url in videos] return [self.playlist_result(url_results, playlist_id)] @@ -1943,18 +1930,6 @@ class YoutubeUserIE(InfoExtractor): pagenum += 1 - all_ids_count = len(video_ids) - playliststart = self._downloader.params.get('playliststart', 1) - 1 - playlistend = self._downloader.params.get('playlistend', -1) - - if playlistend == -1: - video_ids = video_ids[playliststart:] - else: - video_ids = video_ids[playliststart:playlistend] - - self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" % - (username, all_ids_count, len(video_ids))) - urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] url_results = [self.url_result(url) for url in urls] return [self.playlist_result(url_results, playlist_title = username)] @@ -2035,15 +2010,6 @@ class BlipTVUserIE(InfoExtractor): pagenum += 1 - all_ids_count = len(video_ids) - playliststart = self._downloader.params.get('playliststart', 1) - 1 - playlistend = self._downloader.params.get('playlistend', -1) - - if playlistend == -1: - video_ids = video_ids[playliststart:] - else: - video_ids = video_ids[playliststart:playlistend] - self._downloader.to_screen(u"[%s] user %s: Collected %d video ids (downloading %d of them)" % (self.IE_NAME, username, all_ids_count, len(video_ids))) From d281274bf250065f876bb4f75fb6f711e1a26eba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 16 Apr 2013 15:13:29 +0200 Subject: [PATCH 14/14] Add a playlist_index key to the info_dict, can be used in the output template --- README.md | 1 + youtube_dl/FileDownloader.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/README.md b/README.md index c8d28db3c..e2958a9b0 100644 --- a/README.md +++ b/README.md @@ -145,6 +145,7 @@ The `-o` option allows users to indicate a template for the output file names. T - `epoch`: The sequence will be replaced by the Unix epoch when creating the file. - `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. - `playlist`: The name or the id of the playlist that contains the video. + - `playlist_index`: The index of the video in the playlist, a five-digit number. The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment). diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 5a5141ba5..4dabbb440 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -389,6 +389,8 @@ class FileDownloader(object): template_dict['epoch'] = int(time.time()) template_dict['autonumber'] = u'%05d' % self._num_downloads + if template_dict['playlist_index'] is not None: + template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] sanitize = lambda k,v: sanitize_filename( u'NA' if v is None else compat_str(v), @@ -473,6 +475,7 @@ class FileDownloader(object): if 'playlist' not in ie_result: #It isn't part of a playlist ie_result['playlist'] = None + ie_result['playlist_index'] = None if download: #Do the download: self.process_info(ie_result) @@ -506,6 +509,7 @@ class FileDownloader(object): self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) entry_result = self.process_ie_result(entry, False) entry_result['playlist'] = playlist + entry_result['playlist_index'] = i + playliststart #We must do the download here to correctly set the 'playlist' key if download: self.process_info(entry_result)