From 83de7942237b8294df47f318133fc5d7c260d496 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 12 Sep 2013 16:30:43 +0200 Subject: [PATCH 001/103] Add original buildserver from @fraca7 --- devscripts/buildserver.py | 272 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 devscripts/buildserver.py diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py new file mode 100644 index 000000000..4fe80edce --- /dev/null +++ b/devscripts/buildserver.py @@ -0,0 +1,272 @@ +#!/usr/bin/python + +## This is free and unencumbered software released into the public domain. + +## Anyone is free to copy, modify, publish, use, compile, sell, or +## distribute this software, either in source code form or as a compiled +## binary, for any purpose, commercial or non-commercial, and by any +## means. + +## In jurisdictions that recognize copyright laws, the author or authors +## of this software dedicate any and all copyright interest in the +## software to the public domain. We make this dedication for the benefit +## of the public at large and to the detriment of our heirs and +## successors. We intend this dedication to be an overt act of +## relinquishment in perpetuity of all present and future rights to this +## software under copyright law. + +## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +## EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +## MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +## IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +## OTHER DEALINGS IN THE SOFTWARE. + +## For more information, please refer to + +from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler +from SocketServer import ThreadingMixIn +import getopt, threading, sys, urlparse, _winreg, os, subprocess, shutil, tempfile + + +class BuildHTTPServer(ThreadingMixIn, HTTPServer): + allow_reuse_address = True + + +def usage(): + print 'Usage: %s [options]' + print 'Options:' + print + print ' -h, --help Display this help' + print ' -i, --install Launch at session startup' + print ' -u, --uninstall Do not launch at session startup' + print ' -b, --bind Bind to host:port (default localhost:8142)' + sys.exit(0) + + +def main(argv): + opts, args = getopt.getopt(argv, 'hb:iu', ['help', 'bind=', 'install', 'uninstall']) + host = 'localhost' + port = 8142 + + for opt, val in opts: + if opt in ['-h', '--help']: + usage() + elif opt in ['-b', '--bind']: + try: + host, port = val.split(':') + except ValueError: + host = val + else: + port = int(port) + elif opt in ['-i', '--install']: + key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Run', 0, _winreg.KEY_WRITE) + try: + _winreg.SetValueEx(key, 'Youtube-dl builder', 0, _winreg.REG_SZ, + '"%s" "%s" -b %s:%d' % (sys.executable, os.path.normpath(os.path.abspath(sys.argv[0])), + host, port)) + finally: + _winreg.CloseKey(key) + print 'Installed.' + sys.exit(0) + elif opt in ['-u', '--uninstall']: + key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Run', 0, _winreg.KEY_WRITE) + try: + _winreg.DeleteValue(key, 'Youtube-dl builder') + finally: + _winreg.CloseKey(key) + print 'Uninstalled.' + sys.exit(0) + + print 'Listening on %s:%d' % (host, port) + srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) + thr = threading.Thread(target=srv.serve_forever) + thr.start() + raw_input('Hit to stop...\n') + srv.shutdown() + thr.join() + + +def rmtree(path): + for name in os.listdir(path): + fname = os.path.join(path, name) + if os.path.isdir(fname): + rmtree(fname) + else: + os.chmod(fname, 0666) + os.remove(fname) + os.rmdir(path) + +#============================================================================== + +class BuildError(Exception): + def __init__(self, output, code=500): + self.output = output + self.code = code + + def __str__(self): + return self.output + + +class HTTPError(BuildError): + pass + + +class PythonBuilder(object): + def __init__(self, **kwargs): + pythonVersion = kwargs.pop('python', '2.7') + try: + key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion) + try: + self.pythonPath, _ = _winreg.QueryValueEx(key, '') + finally: + _winreg.CloseKey(key) + except Exception: + raise BuildError('No such Python version: %s' % pythonVersion) + + super(PythonBuilder, self).__init__(**kwargs) + + +class GITInfoBuilder(object): + def __init__(self, **kwargs): + try: + self.user, self.repoName = kwargs['path'][:2] + self.rev = kwargs.pop('rev') + except ValueError: + raise BuildError('Invalid path') + except KeyError as e: + raise BuildError('Missing mandatory parameter "%s"' % e.args[0]) + + path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user) + if not os.path.exists(path): + os.makedirs(path) + self.basePath = tempfile.mkdtemp(dir=path) + self.buildPath = os.path.join(self.basePath, 'build') + + super(GITInfoBuilder, self).__init__(**kwargs) + + +class GITBuilder(GITInfoBuilder): + def build(self): + try: + subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath]) + subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath) + except subprocess.CalledProcessError as e: + raise BuildError(e.output) + + super(GITBuilder, self).build() + + +class YoutubeDLBuilder(object): + authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile'] + + def __init__(self, **kwargs): + if self.repoName != 'youtube-dl': + raise BuildError('Invalid repository "%s"' % self.repoName) + if self.user not in self.authorizedUsers: + raise HTTPError('Unauthorized user "%s"' % self.user, 401) + + super(YoutubeDLBuilder, self).__init__(**kwargs) + + def build(self): + try: + subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], + cwd=self.buildPath) + except subprocess.CalledProcessError as e: + raise BuildError(e.output) + + super(YoutubeDLBuilder, self).build() + + +class DownloadBuilder(object): + def __init__(self, **kwargs): + self.handler = kwargs.pop('handler') + self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:])) + self.srcPath = os.path.abspath(os.path.normpath(self.srcPath)) + if not self.srcPath.startswith(self.buildPath): + raise HTTPError(self.srcPath, 401) + + super(DownloadBuilder, self).__init__(**kwargs) + + def build(self): + if not os.path.exists(self.srcPath): + raise HTTPError('No such file', 404) + if os.path.isdir(self.srcPath): + raise HTTPError('Is a directory: %s' % self.srcPath, 401) + + self.handler.send_response(200) + self.handler.send_header('Content-Type', 'application/octet-stream') + self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1]) + self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size)) + self.handler.end_headers() + + with open(self.srcPath, 'rb') as src: + shutil.copyfileobj(src, self.handler.wfile) + + super(DownloadBuilder, self).build() + + +class CleanupTempDir(object): + def build(self): + try: + rmtree(self.basePath) + except Exception as e: + print 'WARNING deleting "%s": %s' % (self.basePath, e) + + super(CleanupTempDir, self).build() + + +class Null(object): + def __init__(self, **kwargs): + pass + + def start(self): + pass + + def close(self): + pass + + def build(self): + pass + + +class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null): + pass + + +class BuildHTTPRequestHandler(BaseHTTPRequestHandler): + actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching. + + def do_GET(self): + path = urlparse.urlparse(self.path) + paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()]) + action, _, path = path.path.strip('/').partition('/') + if path: + path = path.split('/') + if action in self.actionDict: + try: + builder = self.actionDict[action](path=path, handler=self, **paramDict) + builder.start() + try: + builder.build() + finally: + builder.close() + except BuildError as e: + self.send_response(e.code) + msg = unicode(e).encode('UTF-8') + self.send_header('Content-Type', 'text/plain; charset=UTF-8') + self.send_header('Content-Length', len(msg)) + self.end_headers() + self.wfile.write(msg) + except HTTPError as e: + self.send_response(e.code, str(e)) + else: + self.send_response(500, 'Unknown build method "%s"' % action) + else: + self.send_response(500, 'Malformed URL') + +#============================================================================== + +if __name__ == '__main__': + main(sys.argv[1:]) From 353ba14060528d981213c66131bc770f478935de Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 12 Sep 2013 16:34:24 +0200 Subject: [PATCH 002/103] [buildserver] Rely on repository license --- devscripts/buildserver.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index 4fe80edce..edc437dee 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -1,30 +1,5 @@ #!/usr/bin/python -## This is free and unencumbered software released into the public domain. - -## Anyone is free to copy, modify, publish, use, compile, sell, or -## distribute this software, either in source code form or as a compiled -## binary, for any purpose, commercial or non-commercial, and by any -## means. - -## In jurisdictions that recognize copyright laws, the author or authors -## of this software dedicate any and all copyright interest in the -## software to the public domain. We make this dedication for the benefit -## of the public at large and to the detriment of our heirs and -## successors. We intend this dedication to be an overt act of -## relinquishment in perpetuity of all present and future rights to this -## software under copyright law. - -## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -## EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -## MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -## IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -## OTHER DEALINGS IN THE SOFTWARE. - -## For more information, please refer to - from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler from SocketServer import ThreadingMixIn import getopt, threading, sys, urlparse, _winreg, os, subprocess, shutil, tempfile From 71cedb3c0c6860c1cf06792a8e98940f88b34a17 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 13 Sep 2013 02:25:12 +0200 Subject: [PATCH 003/103] [buildserver] Service installation and uninstallation --- devscripts/buildserver.py | 145 +++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 50 deletions(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index edc437dee..45c875b23 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -1,64 +1,109 @@ -#!/usr/bin/python +#!/usr/bin/python3 -from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler -from SocketServer import ThreadingMixIn -import getopt, threading, sys, urlparse, _winreg, os, subprocess, shutil, tempfile +from http.server import HTTPServer, BaseHTTPRequestHandler +from socketserver import ThreadingMixIn +import argparse +import ctypes +import sys +import threading +import os.path class BuildHTTPServer(ThreadingMixIn, HTTPServer): allow_reuse_address = True -def usage(): - print 'Usage: %s [options]' - print 'Options:' - print - print ' -h, --help Display this help' - print ' -i, --install Launch at session startup' - print ' -u, --uninstall Do not launch at session startup' - print ' -b, --bind Bind to host:port (default localhost:8142)' - sys.exit(0) +advapi32 = ctypes.windll.advapi32 +SC_MANAGER_ALL_ACCESS = 0xf003f +SC_MANAGER_CREATE_SERVICE = 0x02 +SERVICE_WIN32_OWN_PROCESS = 0x10 +SERVICE_AUTO_START = 0x2 +SERVICE_ERROR_NORMAL = 0x1 +DELETE = 0x00010000 -def main(argv): - opts, args = getopt.getopt(argv, 'hb:iu', ['help', 'bind=', 'install', 'uninstall']) - host = 'localhost' - port = 8142 - - for opt, val in opts: - if opt in ['-h', '--help']: - usage() - elif opt in ['-b', '--bind']: - try: - host, port = val.split(':') - except ValueError: - host = val - else: - port = int(port) - elif opt in ['-i', '--install']: - key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Run', 0, _winreg.KEY_WRITE) - try: - _winreg.SetValueEx(key, 'Youtube-dl builder', 0, _winreg.REG_SZ, - '"%s" "%s" -b %s:%d' % (sys.executable, os.path.normpath(os.path.abspath(sys.argv[0])), - host, port)) - finally: - _winreg.CloseKey(key) - print 'Installed.' - sys.exit(0) - elif opt in ['-u', '--uninstall']: - key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Run', 0, _winreg.KEY_WRITE) - try: - _winreg.DeleteValue(key, 'Youtube-dl builder') - finally: - _winreg.CloseKey(key) - print 'Uninstalled.' - sys.exit(0) - print 'Listening on %s:%d' % (host, port) +def win_OpenSCManager(): + res = advapi32.OpenSCManagerA(None, None, SC_MANAGER_ALL_ACCESS) + if not res: + raise Exception('Opening service manager failed - ' + 'are you running this as administrator?') + return res + + +def win_install_service(service_name, cmdline): + manager = win_OpenSCManager() + try: + h = advapi32.CreateServiceA( + manager, service_name, None, + SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS, + SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, + cmdline, None, None, None, None, None) + if not h: + raise OSError('Service creation failed: %s' % ctypes.FormatError()) + + advapi32.CloseServiceHandle(h) + finally: + advapi32.CloseServiceHandle(manager) + + +def win_uninstall_service(service_name): + manager = win_OpenSCManager() + try: + h = advapi32.OpenServiceA(manager, service_name, DELETE) + if not h: + raise OSError('Could not find service %s: %s' % ( + service_name, ctypes.FormatError())) + + try: + if not advapi32.DeleteService(h): + raise OSError('Deletion failed: %s' % ctypes.FormatError()) + finally: + advapi32.CloseServiceHandle(h) + finally: + advapi32.CloseServiceHandle(manager) + + +def install_service(bind): + fn = os.path.normpath(__file__) + cmdline = '"%s" "%s" -s -b "%s"' % (sys.executable, fn, bind) + win_install_service('youtubedl_builder', cmdline) + + +def uninstall_service(): + win_uninstall_service('youtubedl_builder') + + +def main(argv): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--install', + action='store_const', dest='action', const='install', + help='Launch at Windows startup') + parser.add_argument('-u', '--uninstall', + action='store_const', dest='action', const='uninstall', + help='Remove Windows service') + parser.add_argument('-s', '--service', + action='store_const', dest='action', const='servce', + help='Run as a Windows service') + parser.add_argument('-b', '--bind', metavar='', + action='store', default='localhost:8142', + help='Bind to host:port (default %default)') + options = parser.parse_args() + + if options.action == 'install': + return install_service(options.bind) + + if options.action == 'uninstall': + return uninstall_service() + + host, port_str = options.bind.split(':') + port = int(port_str) + + print('Listening on %s:%d' % (host, port)) srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) thr = threading.Thread(target=srv.serve_forever) thr.start() - raw_input('Hit to stop...\n') + input('Press ENTER to shut down') srv.shutdown() thr.join() @@ -69,7 +114,7 @@ def rmtree(path): if os.path.isdir(fname): rmtree(fname) else: - os.chmod(fname, 0666) + os.chmod(fname, 0o666) os.remove(fname) os.rmdir(path) @@ -187,7 +232,7 @@ class CleanupTempDir(object): try: rmtree(self.basePath) except Exception as e: - print 'WARNING deleting "%s": %s' % (self.basePath, e) + print('WARNING deleting "%s": %s' % (self.basePath, e)) super(CleanupTempDir, self).build() From 9a1c32dc54fdefcd6b5e03fac1a0dd65383b6f99 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 14 Sep 2013 05:42:00 +0200 Subject: [PATCH 004/103] XHamsterIE: Add support for new URL format --- youtube_dl/extractor/xhamster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 88b8b6be0..e50069586 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -11,7 +11,7 @@ from ..utils import ( class XHamsterIE(InfoExtractor): """Information Extractor for xHamster""" - _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P[0-9]+)/.*\.html' + _VALID_URL = r'(?:http://)?(?P(?:www\.)?xhamster\.com/movies/(?P[0-9]+)/.*\.html(?:\?.*)?)' _TEST = { u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', u'file': u'1509445.flv', @@ -27,7 +27,7 @@ class XHamsterIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id + mrss_url = 'http://' + mobj.group('url') webpage = self._download_webpage(mrss_url, video_id) mobj = re.search(r'\'srv\': \'(?P[^\']*)\',\s*\'file\': \'(?P[^\']+)\',', webpage) From fad84d50fe124df1c620c9bc95bdc4c9e5053e6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 14 Sep 2013 11:10:01 +0200 Subject: [PATCH 005/103] [googleplus] Fix upload date extraction --- youtube_dl/extractor/googleplus.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index f1cd88983..8895ad289 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -40,7 +40,8 @@ class GooglePlusIE(InfoExtractor): self.report_extraction(video_id) # Extract update date - upload_date = self._html_search_regex('title="Timestamp">(.*?)', + upload_date = self._html_search_regex( + ['title="Timestamp">(.*?)', r'(.+?)'], webpage, u'upload date', fatal=False) if upload_date: # Convert timestring to a format suitable for filename From 0b7f31184d6a2d87cf7f568c561ff8d017f07bd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 14 Sep 2013 11:14:40 +0200 Subject: [PATCH 006/103] Now --all-sub is a modifier to --write-sub and --write-auto-sub (closes #1412) For keeping backwards compatibility --all-sub sets --write-sub if --write-auto-sub is not given --- test/test_dailymotion_subtitles.py | 2 ++ test/test_youtube_subtitles.py | 2 ++ youtube_dl/YoutubeDL.py | 4 ++-- youtube_dl/__init__.py | 5 +++++ youtube_dl/extractor/subtitles.py | 5 ++--- 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py index bcd9f79f6..83c65d57e 100644 --- a/test/test_dailymotion_subtitles.py +++ b/test/test_dailymotion_subtitles.py @@ -40,6 +40,7 @@ class TestDailymotionSubtitles(unittest.TestCase): subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 5) @@ -54,6 +55,7 @@ class TestDailymotionSubtitles(unittest.TestCase): self.assertTrue(len(subtitles.keys()) == 0) def test_nosubtitles(self): self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' + self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles), 0) diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 5632871ac..168e6c66c 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -41,6 +41,7 @@ class TestYoutubeSubtitles(unittest.TestCase): subtitles = self.getSubtitles() self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') def test_youtube_allsubtitles(self): + self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 13) @@ -66,6 +67,7 @@ class TestYoutubeSubtitles(unittest.TestCase): self.assertTrue(subtitles['it'] is not None) def test_youtube_nosubtitles(self): self.url = 'sAjKT8FhjI8' + self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles), 0) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c2f992b8e..e53a2b8ad 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -74,6 +74,7 @@ class YoutubeDL(object): writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video + (requires writesubtitles or writeautomaticsub) listsubtitles: Lists all available subtitles for the video subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) subtitleslangs: List of languages of the subtitles to download @@ -499,8 +500,7 @@ class YoutubeDL(object): return subtitles_are_requested = any([self.params.get('writesubtitles', False), - self.params.get('writeautomaticsub'), - self.params.get('allsubtitles', False)]) + self.params.get('writeautomaticsub')]) if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 696e54f49..0022a4e7a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -533,6 +533,11 @@ def _real_main(argv=None): else: date = DateRange(opts.dateafter, opts.datebefore) + # --all-sub automatically sets --write-sub if --write-auto-sub is not given + # this was the old behaviour if only --all-sub was given. + if opts.allsubtitles and (opts.writeautomaticsub == False): + opts.writesubtitles = True + if sys.version_info < (3,): # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) if opts.outtmpl is not None: diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py index 97215f289..90de7de3a 100644 --- a/youtube_dl/extractor/subtitles.py +++ b/youtube_dl/extractor/subtitles.py @@ -10,8 +10,7 @@ class SubtitlesInfoExtractor(InfoExtractor): @property def _have_to_download_any_subtitles(self): return any([self._downloader.params.get('writesubtitles', False), - self._downloader.params.get('writeautomaticsub'), - self._downloader.params.get('allsubtitles', False)]) + self._downloader.params.get('writeautomaticsub')]) def _list_available_subtitles(self, video_id, webpage=None): """ outputs the available subtitles for the video """ @@ -34,7 +33,7 @@ class SubtitlesInfoExtractor(InfoExtractor): available_subs_list = {} if self._downloader.params.get('writeautomaticsub', False): available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage)) - if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): + if self._downloader.params.get('writesubtitles', False): available_subs_list.update(self._get_available_subtitles(video_id)) if not available_subs_list: # error, it didn't get the available subtitles From 19e1d35989970831007b7ca5d988fe0454f08a1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 14 Sep 2013 14:26:42 +0200 Subject: [PATCH 007/103] [mixcloud] Rewrite extractor (fixes #278) --- youtube_dl/extractor/mixcloud.py | 122 ++++++++++--------------------- youtube_dl/utils.py | 11 ++- 2 files changed, 48 insertions(+), 85 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 8245b5583..a200dcd74 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -5,34 +5,27 @@ import socket from .common import InfoExtractor from ..utils import ( compat_http_client, - compat_str, compat_urllib_error, compat_urllib_request, - - ExtractorError, + unified_strdate, ) class MixcloudIE(InfoExtractor): - _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/ _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' IE_NAME = u'mixcloud' - def report_download_json(self, file_id): - """Report JSON download.""" - self.to_screen(u'Downloading json') - - def get_urls(self, jsonData, fmt, bitrate='best'): - """Get urls from 'audio_formats' section in json""" - try: - bitrate_list = jsonData[fmt] - if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list: - bitrate = max(bitrate_list) # select highest - - url_list = jsonData[fmt][bitrate] - except TypeError: # we have no bitrate info. - url_list = jsonData[fmt] - return url_list + _TEST = { + u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/', + u'file': u'dholbach-cryptkeeper.mp3', + u'info_dict': { + u'title': u'Cryptkeeper', + u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', + u'uploader': u'Daniel Holbach', + u'uploader_id': u'dholbach', + u'upload_date': u'20111115', + }, + } def check_urls(self, url_list): """Returns 1st active url from list""" @@ -45,71 +38,32 @@ class MixcloudIE(InfoExtractor): return None - def _print_formats(self, formats): - print('Available formats:') - for fmt in formats.keys(): - for b in formats[fmt]: - try: - ext = formats[fmt][b][0] - print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) - except TypeError: # we have no bitrate info - ext = formats[fmt][0] - print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) - break - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) - # extract uploader & filename from url - uploader = mobj.group(1).decode('utf-8') - file_id = uploader + "-" + mobj.group(2).decode('utf-8') - - # construct API request - file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json' - # retrieve .json file with links to files - request = compat_urllib_request.Request(file_url) - try: - self.report_download_json(file_url) - jsonData = compat_urllib_request.urlopen(request).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err)) - - # parse JSON - json_data = json.loads(jsonData) - player_url = json_data['player_swf_url'] - formats = dict(json_data['audio_formats']) - - req_format = self._downloader.params.get('format', None) - - if self._downloader.params.get('listformats', None): - self._print_formats(formats) - return - - if req_format is None or req_format == 'best': - for format_param in formats.keys(): - url_list = self.get_urls(formats, format_param) - # check urls - file_url = self.check_urls(url_list) - if file_url is not None: - break # got it! - else: - if req_format not in formats: - raise ExtractorError(u'Format is not available') - - url_list = self.get_urls(formats, req_format) - file_url = self.check_urls(url_list) - format_param = req_format - return [{ - 'id': file_id.decode('utf-8'), - 'url': file_url.decode('utf-8'), - 'uploader': uploader.decode('utf-8'), - 'upload_date': None, - 'title': json_data['name'], - 'ext': file_url.split('.')[-1].decode('utf-8'), - 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), - 'thumbnail': json_data['thumbnail_url'], - 'description': json_data['description'], - 'player_url': player_url.decode('utf-8'), - }] + uploader = mobj.group(1) + cloudcast_name = mobj.group(2) + track_id = '-'.join((uploader, cloudcast_name)) + api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) + webpage = self._download_webpage(url, track_id) + json_data = self._download_webpage(api_url, track_id, + u'Downloading cloudcast info') + info = json.loads(json_data) + + preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') + song_url = preview_url.replace('/previews/', '/cloudcasts/originals/') + template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) + final_song_url = self.check_urls(template_url % i for i in range(30)) + + return { + 'id': track_id, + 'title': info['name'], + 'url': final_song_url, + 'ext': 'mp3', + 'description': info['description'], + 'thumbnail': info['pictures'].get('extra_large'), + 'uploader': info['user']['name'], + 'uploader_id': info['user']['username'], + 'upload_date': unified_strdate(info['created_time']), + 'view_count': info['play_count'], + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 768c6207d..5558d4737 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -700,7 +700,16 @@ def unified_strdate(date_str): date_str = date_str.replace(',',' ') # %z (UTC offset) is only supported in python>=3.2 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) - format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M'] + format_expressions = [ + '%d %B %Y', + '%B %d %Y', + '%b %d %Y', + '%Y-%m-%d', + '%d/%m/%Y', + '%Y/%m/%d %H:%M:%S', + '%d.%m.%Y %H:%M', + '%Y-%m-%dT%H:%M:%SZ', + ] for expression in format_expressions: try: upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') From 471a5ee908ee765c1ba1ff6a41051bcf71065064 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 14 Sep 2013 14:45:04 +0200 Subject: [PATCH 008/103] Set the ext field for each format --- youtube_dl/extractor/archiveorg.py | 7 ++++--- youtube_dl/extractor/dreisat.py | 6 +++--- youtube_dl/extractor/trilulilu.py | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index 7efd1d823..61ce4469a 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -46,6 +46,8 @@ class ArchiveOrgIE(InfoExtractor): for fn,fdata in data['files'].items() if 'Video' in fdata['format']] formats.sort(key=lambda fdata: fdata['file_size']) + for f in formats: + f['ext'] = determine_ext(f['url']) info = { '_type': 'video', @@ -61,7 +63,6 @@ class ArchiveOrgIE(InfoExtractor): info['thumbnail'] = thumbnail # TODO: Remove when #980 has been merged - info['url'] = formats[-1]['url'] - info['ext'] = determine_ext(formats[-1]['url']) + info.update(formats[-1]) - return info \ No newline at end of file + return info diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 64b465805..765cb1f37 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -54,6 +54,7 @@ class DreiSatIE(InfoExtractor): 'width': int(fe.find('./width').text), 'height': int(fe.find('./height').text), 'url': fe.find('./url').text, + 'ext': determine_ext(fe.find('./url').text), 'filesize': int(fe.find('./filesize').text), 'video_bitrate': int(fe.find('./videoBitrate').text), '3sat_qualityname': fe.find('./quality').text, @@ -79,7 +80,6 @@ class DreiSatIE(InfoExtractor): } # TODO: Remove when #980 has been merged - info['url'] = formats[-1]['url'] - info['ext'] = determine_ext(formats[-1]['url']) + info.update(formats[-1]) - return info \ No newline at end of file + return info diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py index f278951ba..0bf028f61 100644 --- a/youtube_dl/extractor/trilulilu.py +++ b/youtube_dl/extractor/trilulilu.py @@ -52,6 +52,7 @@ class TriluliluIE(InfoExtractor): { 'format': fnode.text, 'url': video_url_template % fnode.text, + 'ext': fnode.text.partition('-')[0] } for fnode in format_doc.findall('./formats/format') @@ -67,7 +68,6 @@ class TriluliluIE(InfoExtractor): } # TODO: Remove when #980 has been merged - info['url'] = formats[-1]['url'] - info['ext'] = formats[-1]['format'].partition('-')[0] + info.update(formats[-1]) return info From 92790f4e542fc3d5f4cc02a647a2695d9175d464 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 14 Sep 2013 21:41:49 +0200 Subject: [PATCH 009/103] [soundcloud] Add an extractor for users (closes #1426) --- test/test_playlists.py | 10 ++++++- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/soundcloud.py | 45 ++++++++++++++++++++++++++++-- 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 4a2e00b01..d079a4f23 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -8,7 +8,7 @@ import json import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE +from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE from youtube_dl.utils import * from helper import FakeYDL @@ -42,5 +42,13 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], u'5124905') self.assertTrue(len(result['entries']) >= 11) + def test_soundcloud_user(self): + dl = FakeYDL() + ie = SoundcloudUserIE(dl) + result = ie.extract('https://soundcloud.com/the-concept-band') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], u'9615865') + self.assertTrue(len(result['entries']) >= 12) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 06f9542d2..19d57c2e9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -82,7 +82,7 @@ from .sina import SinaIE from .slashdot import SlashdotIE from .slideshare import SlideshareIE from .sohu import SohuIE -from .soundcloud import SoundcloudIE, SoundcloudSetIE +from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE from .spiegel import SpiegelIE from .stanfordoc import StanfordOpenClassroomIE from .statigram import StatigramIE diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 5f3a5540d..29cd5617c 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -1,10 +1,12 @@ import json import re +import itertools from .common import InfoExtractor from ..utils import ( compat_str, compat_urlparse, + compat_urllib_parse, ExtractorError, unified_strdate, @@ -53,10 +55,11 @@ class SoundcloudIE(InfoExtractor): def _resolv_url(cls, url): return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID - def _extract_info_dict(self, info, full_title=None): + def _extract_info_dict(self, info, full_title=None, quiet=False): video_id = info['id'] name = full_title or video_id - self.report_extraction(name) + if quiet == False: + self.report_extraction(name) thumbnail = info['artwork_url'] if thumbnail is not None: @@ -198,3 +201,41 @@ class SoundcloudSetIE(SoundcloudIE): 'id': info['id'], 'title': info['title'], } + + +class SoundcloudUserIE(SoundcloudIE): + _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P[^/]+)(/?(tracks/)?)?(\?.*)?$' + IE_NAME = u'soundcloud:user' + + # it's in tests/test_playlists.py + _TEST = None + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + uploader = mobj.group('user') + + url = 'http://soundcloud.com/%s/' % uploader + resolv_url = self._resolv_url(url) + user_json = self._download_webpage(resolv_url, uploader, + u'Downloading user info') + user = json.loads(user_json) + + tracks = [] + for i in itertools.count(): + data = compat_urllib_parse.urlencode({'offset': i*50, + 'client_id': self._CLIENT_ID, + }) + tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data + response = self._download_webpage(tracks_url, uploader, + u'Downloading tracks page %s' % (i+1)) + new_tracks = json.loads(response) + tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks) + if len(new_tracks) < 50: + break + + return { + '_type': 'playlist', + 'id': compat_str(user['id']), + 'title': user['username'], + 'entries': tracks, + } From e69ae5b9e74910541e75eea4c8dfc13066f28f65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 15 Sep 2013 12:14:59 +0200 Subject: [PATCH 010/103] [youtube] support youtube.googleapis.com/v/* urls (fixes #1425) --- test/test_all_urls.py | 1 + youtube_dl/extractor/youtube.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 99fc7bd28..ff1c86efe 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -36,6 +36,7 @@ class TestAllURLsMatching(unittest.TestCase): self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) + self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) def test_youtube_channel_matching(self): assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f49665925..e4a2e22bc 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -139,7 +139,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): ( (?:https?://)? # http(s):// (optional) (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/| - tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains + tube\.majestyc\.net/| + youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: (?:(?:v|embed|e)/) # v/ or embed/ or e/ From 5a6fecc3dee35f95f3590a31e51670819db5a1fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 15 Sep 2013 23:30:58 +0200 Subject: [PATCH 011/103] Add an extractor for southparkstudios.com (closes #1434) It uses the MTV system --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/southparkstudios.py | 34 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 youtube_dl/extractor/southparkstudios.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 19d57c2e9..246f1e8b5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -83,6 +83,7 @@ from .slashdot import SlashdotIE from .slideshare import SlideshareIE from .sohu import SohuIE from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE +from .southparkstudios import SouthParkStudiosIE from .spiegel import SpiegelIE from .stanfordoc import StanfordOpenClassroomIE from .statigram import StatigramIE diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py new file mode 100644 index 000000000..a5dc754dd --- /dev/null +++ b/youtube_dl/extractor/southparkstudios.py @@ -0,0 +1,34 @@ +import re + +from .mtv import MTVIE, _media_xml_tag + + +class SouthParkStudiosIE(MTVIE): + IE_NAME = u'southparkstudios.com' + _VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P\d+)' + + _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' + + _TEST = { + u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', + u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', + u'info_dict': { + u'title': u'Bat Daded', + u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.', + }, + } + + # Overwrite MTVIE properties we don't want + _TESTS = [] + + def _get_thumbnail_url(self, uri, itemdoc): + search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) + return itemdoc.find(search_path).attrib['url'] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"', + webpage, u'mgid') + return self._get_videos_info(mgid) From 22b50ecb2f7f9e0469d281a4c401d4a531c1cc5b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 16 Sep 2013 03:32:45 +0200 Subject: [PATCH 012/103] Starts of a Windows service --- devscripts/buildserver.py | 143 ++++++++++++++++++++++++++++++++++---- 1 file changed, 128 insertions(+), 15 deletions(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index 45c875b23..e0c3cc83e 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -4,8 +4,10 @@ from http.server import HTTPServer, BaseHTTPRequestHandler from socketserver import ThreadingMixIn import argparse import ctypes +import functools import sys import threading +import traceback import os.path @@ -21,10 +23,40 @@ SERVICE_WIN32_OWN_PROCESS = 0x10 SERVICE_AUTO_START = 0x2 SERVICE_ERROR_NORMAL = 0x1 DELETE = 0x00010000 +SERVICE_STATUS_START_PENDING = 0x00000002 +SERVICE_STATUS_RUNNING = 0x00000004 +SERVICE_ACCEPT_STOP = 0x1 + +SVCNAME = 'youtubedl_builder' + +LPTSTR = ctypes.c_wchar_p +START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR)) + + +class SERVICE_TABLE_ENTRY(ctypes.Structure): + _fields_ = [ + ('lpServiceName', LPTSTR), + ('lpServiceProc', START_CALLBACK) + ] + + +HandlerEx = ctypes.WINFUNCTYPE( + ctypes.c_int, # return + ctypes.c_int, # dwControl + ctypes.c_int, # dwEventType + ctypes.c_void_p, # lpEventData, + ctypes.c_void_p, # lpContext, +) + + +def _ctypes_array(c_type, py_array): + ar = (c_type * len(py_array))() + ar[:] = py_array + return ar def win_OpenSCManager(): - res = advapi32.OpenSCManagerA(None, None, SC_MANAGER_ALL_ACCESS) + res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS) if not res: raise Exception('Opening service manager failed - ' 'are you running this as administrator?') @@ -34,7 +66,7 @@ def win_OpenSCManager(): def win_install_service(service_name, cmdline): manager = win_OpenSCManager() try: - h = advapi32.CreateServiceA( + h = advapi32.CreateServiceW( manager, service_name, None, SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS, SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, @@ -50,7 +82,7 @@ def win_install_service(service_name, cmdline): def win_uninstall_service(service_name): manager = win_OpenSCManager() try: - h = advapi32.OpenServiceA(manager, service_name, DELETE) + h = advapi32.OpenServiceW(manager, service_name, DELETE) if not h: raise OSError('Could not find service %s: %s' % ( service_name, ctypes.FormatError())) @@ -64,17 +96,90 @@ def win_uninstall_service(service_name): advapi32.CloseServiceHandle(manager) -def install_service(bind): - fn = os.path.normpath(__file__) - cmdline = '"%s" "%s" -s -b "%s"' % (sys.executable, fn, bind) - win_install_service('youtubedl_builder', cmdline) +def win_service_report_event(service_name, msg, is_error=True): + with open('C:/sshkeys/log', 'a', encoding='utf-8') as f: + f.write(msg + '\n') + + event_log = advapi32.RegisterEventSourceW(None, service_name) + if not event_log: + raise OSError('Could not report event: %s' % ctypes.FormatError()) + + try: + type_id = 0x0001 if is_error else 0x0004 + event_id = 0xc0000000 if is_error else 0x40000000 + lines = _ctypes_array(LPTSTR, [msg]) + + if not advapi32.ReportEventW( + event_log, type_id, 0, event_id, None, len(lines), 0, + lines, None): + raise OSError('Event reporting failed: %s' % ctypes.FormatError()) + finally: + advapi32.DeregisterEventSource(event_log) + + +def win_service_handler(stop_event, *args): + try: + raise ValueError('Handler called with args ' + repr(args)) + TODO + except Exception as e: + tb = traceback.format_exc() + msg = str(e) + '\n' + tb + win_service_report_event(service_name, msg, is_error=True) + raise + + +def win_service_set_status(handle, status_code): + svcStatus = SERVICE_STATUS() + svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS + svcStatus.dwCurrentState = status_code + svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP + + svcStatus.dwServiceSpecificExitCode = 0 + if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)): + raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError()) -def uninstall_service(): - win_uninstall_service('youtubedl_builder') +def win_service_main(service_name, real_main, argc, argv_raw): + try: + #args = [argv_raw[i].value for i in range(argc)] + stop_event = threading.Event() + handler = HandlerEx(functools.partial(stop_event, win_service_handler)) + h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None) + if not h: + raise OSError('Handler registration failed: %s' % + ctypes.FormatError()) + + TODO + except Exception as e: + tb = traceback.format_exc() + msg = str(e) + '\n' + tb + win_service_report_event(service_name, msg, is_error=True) + raise -def main(argv): + +def win_service_start(service_name, real_main): + try: + cb = START_CALLBACK( + functools.partial(win_service_main, service_name, real_main)) + dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [ + SERVICE_TABLE_ENTRY( + service_name, + cb + ), + SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK)) + ]) + + if not advapi32.StartServiceCtrlDispatcherW(dispatch_table): + raise OSError('ctypes start failed: %s' % ctypes.FormatError()) + except Exception as e: + tb = traceback.format_exc() + msg = str(e) + '\n' + tb + win_service_report_event(service_name, msg, is_error=True) + raise + + +def main(args=None): parser = argparse.ArgumentParser() parser.add_argument('-i', '--install', action='store_const', dest='action', const='install', @@ -83,18 +188,26 @@ def main(argv): action='store_const', dest='action', const='uninstall', help='Remove Windows service') parser.add_argument('-s', '--service', - action='store_const', dest='action', const='servce', + action='store_const', dest='action', const='service', help='Run as a Windows service') parser.add_argument('-b', '--bind', metavar='', action='store', default='localhost:8142', help='Bind to host:port (default %default)') - options = parser.parse_args() + options = parser.parse_args(args=args) if options.action == 'install': - return install_service(options.bind) + fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox') + cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind) + win_install_service(SVCNAME, cmdline) + return if options.action == 'uninstall': - return uninstall_service() + win_uninstall_service(SVCNAME) + return + + if options.action == 'service': + win_service_start(SVCNAME, main) + return host, port_str = options.bind.split(':') port = int(port_str) @@ -289,4 +402,4 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler): #============================================================================== if __name__ == '__main__': - main(sys.argv[1:]) + main() From 6c603ccce334ae244d73c0e82eb5c59e36c3d027 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 16 Sep 2013 04:12:43 +0200 Subject: [PATCH 013/103] [devscripts/release] temporary workarounds --- devscripts/release.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 62c68a6cf..796468b4b 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -55,8 +55,8 @@ git push origin "$version" /bin/echo -e "\n### OK, now it is time to build the binaries..." REV=$(git rev-parse HEAD) make youtube-dl youtube-dl.tar.gz -wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \ - wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe +read -p "VM running? (y/n) " -n 1 +wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe mkdir -p "build/$version" mv youtube-dl youtube-dl.exe "build/$version" mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" From f9e66fb99367b5ccac3f0c1c61441ed52d787836 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 16 Sep 2013 04:12:57 +0200 Subject: [PATCH 014/103] release 2013.09.16 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3b2505c77..e06b0cd6c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.09.12' +__version__ = '2013.09.16' From 7459e3a29081dfa4cbbcc795e054e884e1d5e020 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 16 Sep 2013 06:55:33 +0200 Subject: [PATCH 015/103] Always correct encoding when writing to sys.stderr (Fixes #1435) --- youtube_dl/YoutubeDL.py | 6 +----- youtube_dl/__init__.py | 20 ++++++++++---------- youtube_dl/utils.py | 12 ++++++++++++ 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e53a2b8ad..de2b133e0 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -142,14 +142,10 @@ class YoutubeDL(object): def to_screen(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" - assert type(message) == type(u'') if not self.params.get('quiet', False): terminator = [u'\n', u''][skip_eol] output = message + terminator - if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr - output = output.encode(preferredencoding(), 'ignore') - self._screen_file.write(output) - self._screen_file.flush() + write_string(output, self._screen_file) def to_stderr(self, message): """Print message to stderr.""" diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 0022a4e7a..9efd7c3f7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -354,7 +354,7 @@ def parseOpts(overrideArguments=None): if overrideArguments is not None: opts, args = parser.parse_args(overrideArguments) if opts.verbose: - sys.stderr.write(u'[debug] Override config: ' + repr(overrideArguments) + '\n') + write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') else: xdg_config_home = os.environ.get('XDG_CONFIG_HOME') if xdg_config_home: @@ -367,9 +367,9 @@ def parseOpts(overrideArguments=None): argv = systemConf + userConf + commandLineConf opts, args = parser.parse_args(argv) if opts.verbose: - sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') - sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') - sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') + write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') + write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') + write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') return parser, opts, args @@ -392,7 +392,7 @@ def _real_main(argv=None): except (IOError, OSError) as err: if opts.verbose: traceback.print_exc() - sys.stderr.write(u'ERROR: unable to open cookie file\n') + write_string(u'ERROR: unable to open cookie file\n') sys.exit(101) # Set user agent if opts.user_agent is not None: @@ -419,7 +419,7 @@ def _real_main(argv=None): batchurls = [x.strip() for x in batchurls] batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] if opts.verbose: - sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') + write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') except IOError: sys.exit(u'ERROR: batch file could not be read') all_urls = batchurls + args @@ -611,7 +611,7 @@ def _real_main(argv=None): }) if opts.verbose: - sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n') + write_string(u'[debug] youtube-dl version ' + __version__ + u'\n') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], @@ -620,14 +620,14 @@ def _real_main(argv=None): out, err = sp.communicate() out = out.decode().strip() if re.match('[0-9a-f]+', out): - sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n') + write_string(u'[debug] Git HEAD: ' + out + u'\n') except: try: sys.exc_clear() except: pass - sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') - sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') + write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') + write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') ydl.add_default_info_extractors() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5558d4737..814a9b6be 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -790,6 +790,18 @@ def platform_name(): return res +def write_string(s, out=None): + if out is None: + out = sys.stderr + assert type(s) == type(u'') + + if ('b' in getattr(out, 'mode', '') or + sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr + s = s.encode(preferredencoding(), 'ignore') + out.write(s) + out.flush() + + def bytes_to_intlist(bs): if not bs: return [] From 71c107fc5716dc769860ba6d3731184bde9a6902 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 16 Sep 2013 14:45:14 +0200 Subject: [PATCH 016/103] Add FKTV extractor Support for Fernsehkritik-TV (incl. Postecke) --- youtube_dl/extractor/fktv.py | 58 ++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 youtube_dl/extractor/fktv.py diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py new file mode 100644 index 000000000..239d9df38 --- /dev/null +++ b/youtube_dl/extractor/fktv.py @@ -0,0 +1,58 @@ +import re,random + +from .common import InfoExtractor +from ..utils import ( + determine_ext, +) + +class FKTVIE(InfoExtractor): + """Information Extractor for Fernsehkritik-TV""" + _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P[0-9]+)(?:/.*)?' + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + episode = int(mobj.group('ep')) + + server = random.randint(2,4) + video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode + videos = [] + # Download all three parts + for i in range(1,4): + video_id = '%04d%d' % (episode, i) + video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i==1 else '-%d'%i) + video_title = 'Fernsehkritik %d.%d' % (episode, i) + videos.append({ + 'id': video_id, + 'url': video_url, + 'ext': determine_ext(video_url), + 'title': video_title, + 'thumbnail': video_thumbnail + }) + return videos + +class FKTVPosteckeIE(InfoExtractor): + """Information Extractor for Fernsehkritik-TV Postecke""" + _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P[0-9]+)(&|$)' + _TEST = { + u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120', + u'file': u'0120.flv', + u'md5': u'262f0adbac80317412f7e57b4808e5c4', + u'info_dict': { + u"title": u"Postecke 120" + } + } + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + episode = int(mobj.group('ep')) + + server = random.randint(2,4) + video_id = '%04d' % episode + video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode) + video_title = 'Postecke %d' % episode + return[{ + 'id': video_id, + 'url': video_url, + 'ext': determine_ext(video_url), + 'title': video_title, + }] From 0761d02b0baf20955bd6e4f53568a3bbaa75ab5c Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 16 Sep 2013 14:46:19 +0200 Subject: [PATCH 017/103] Add FKTV extractor --- youtube_dl/extractor/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 06f9542d2..25a8e3cf5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -28,6 +28,10 @@ from .eighttracks import EightTracksIE from .escapist import EscapistIE from .exfm import ExfmIE from .facebook import FacebookIE +from .fktv import ( + FKTVIE, + FKTVPosteckeIE, +) from .flickr import FlickrIE from .francetv import ( PluzzIE, From c4ece785647e58afb4f7b72f492eaf8e714bceba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 16 Sep 2013 19:34:10 +0200 Subject: [PATCH 018/103] [ooyala] add support for more type of video urls, like m3u8 manifests. --- youtube_dl/extractor/ooyala.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index b734722d0..01b3637c9 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -35,7 +35,9 @@ class OoyalaIE(InfoExtractor): mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', player, u'mobile player url') mobile_player = self._download_webpage(mobile_url, embedCode) - videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"') + videos_info = self._search_regex( + r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', + mobile_player, u'info').replace('\\"','"') videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"') videos_info = json.loads(videos_info) videos_more_info =json.loads(videos_more_info) From 4b6462fc1e4306e4a1a5b3613b2cef5b09cc9abe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 16 Sep 2013 19:39:39 +0200 Subject: [PATCH 019/103] Add an extractor for Bloomberg (closes #1436) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/bloomberg.py | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 youtube_dl/extractor/bloomberg.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 246f1e8b5..7973a81d0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -6,6 +6,7 @@ from .arte import ArteTvIE from .auengine import AUEngineIE from .bandcamp import BandcampIE from .bliptv import BlipTVIE, BlipTVUserIE +from .bloomberg import BloombergIE from .breakcom import BreakIE from .brightcove import BrightcoveIE from .c56 import C56IE diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py new file mode 100644 index 000000000..3666a780b --- /dev/null +++ b/youtube_dl/extractor/bloomberg.py @@ -0,0 +1,27 @@ +import re + +from .common import InfoExtractor + + +class BloombergIE(InfoExtractor): + _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P.+?).html' + + _TEST = { + u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', + u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4', + u'info_dict': { + u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies', + u'description': u'md5:abc86e5236f9f0e4866c59ad36736686', + }, + u'params': { + # Requires ffmpeg (m3u8 manifest) + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + webpage = self._download_webpage(url, name) + ooyala_url = self._og_search_video_url(webpage) + return self.url_result(ooyala_url, ie='Ooyala') From 4dc0ff3ecf2118a0bac128cb8e006e151222e23b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 16 Sep 2013 20:16:52 +0200 Subject: [PATCH 020/103] [ooyala] prefer ipad url It has better quality with m3u8 manifests --- youtube_dl/extractor/ooyala.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 01b3637c9..d189a9852 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -22,7 +22,7 @@ class OoyalaIE(InfoExtractor): return {'id': info['embedCode'], 'ext': 'mp4', 'title': unescapeHTML(info['title']), - 'url': info['url'], + 'url': info.get('ipad_url') or info['url'], 'description': unescapeHTML(more_info['description']), 'thumbnail': more_info['promo'], } From e8f8e800978c8845a706ebd3ab31bc1b98a51461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 16 Sep 2013 20:58:36 +0200 Subject: [PATCH 021/103] Add an extractor for vice.com (closes #1051) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ooyala.py | 4 ++++ youtube_dl/extractor/vice.py | 38 ++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/vice.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7973a81d0..761575062 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -104,6 +104,7 @@ from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veoh import VeohIE from .vevo import VevoIE +from .vice import ViceIE from .videofyme import VideofyMeIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index d189a9852..1f7b4d2e7 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -18,6 +18,10 @@ class OoyalaIE(InfoExtractor): }, } + @staticmethod + def _url_for_embed_code(embed_code): + return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code + def _extract_result(self, info, more_info): return {'id': info['embedCode'], 'ext': 'mp4', diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py new file mode 100644 index 000000000..6b93afa50 --- /dev/null +++ b/youtube_dl/extractor/vice.py @@ -0,0 +1,38 @@ +import re + +from .common import InfoExtractor +from .ooyala import OoyalaIE +from ..utils import ExtractorError + + +class ViceIE(InfoExtractor): + _VALID_URL = r'http://www.vice.com/.*?/(?P.+)' + + _TEST = { + u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1', + u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4', + u'info_dict': { + u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', + }, + u'params': { + # Requires ffmpeg (m3u8 manifest) + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + webpage = self._download_webpage(url, name) + try: + ooyala_url = self._og_search_video_url(webpage) + except ExtractorError: + try: + embed_code = self._search_regex( + r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage, + u'ooyala embed code') + ooyala_url = OoyalaIE._url_for_embed_code(embed_code) + except ExtractorError: + raise ExtractorError(u'The page doesn\'t contain a video', expected=True) + return self.url_result(ooyala_url, ie='Ooyala') + From 6ae8ee3f542485b3c790fc09e1136762b1b80c89 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Sep 2013 16:59:12 +0200 Subject: [PATCH 022/103] Update 85 signature (Fixes #1449) This is the first signature algorithm to have been parsed automatically, although that only works for HTML5 players for now, and is not yet integrated into master. --- devscripts/youtube_genalgo.py | 4 ++-- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index b390c7e2e..66019ee55 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -24,8 +24,8 @@ tests = [ ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"), # 85 - vflkuzxcs 2013/09/11 - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", - "T>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOvUY.REWQ0987654321mnbqcxzasdfghjklpoiuytr"), + ('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[', + '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'), # 84 - vflg0g8PQ 2013/08/29 (sporadic) ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e4a2e22bc..0c963fd20 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -429,7 +429,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif len(s) == 86: return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53] elif len(s) == 85: - return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1] + return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] elif len(s) == 84: return s[81:36:-1] + s[0] + s[35:2:-1] elif len(s) == 83: From f3f34c5b0f51b4453033ef83981ff3284c050da8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Sep 2013 17:00:20 +0200 Subject: [PATCH 023/103] release 2013.09.17 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e06b0cd6c..80ccfbd4f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.09.16' +__version__ = '2013.09.17' From 4a67aafb7e725c49e7bb3bcc5aea3fb3ae5fb42d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 17 Sep 2013 20:59:55 +0200 Subject: [PATCH 024/103] [youtube] Don't search the flash player version for videos with age gate activated --- youtube_dl/extractor/youtube.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0c963fd20..f227e2086 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -783,10 +783,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if self._downloader.params.get('verbose'): s = url_data['s'][0] if age_gate: - player_version = self._search_regex(r'ad3-(.+?)\.swf', - video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND', - 'flash player', fatal=False) - player = 'flash player %s' % player_version + player = 'flash player' else: player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage, 'html5 player', fatal=False) From 6523223a4c6f8924ac156b3fc2f5519a53b58e4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 17 Sep 2013 21:10:57 +0200 Subject: [PATCH 025/103] [hotnewhiphop] Fix test case title --- youtube_dl/extractor/hotnewhiphop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index ccca1d7e0..3798118a7 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -7,11 +7,11 @@ from .common import InfoExtractor class HotNewHipHopIE(InfoExtractor): _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P.*)\.html' _TEST = { - u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'", + u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html", u'file': u'1435540.mp3', u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96', u'info_dict': { - u"title": u"Freddie Gibbs Songs - Lay It Down" + u"title": u"Freddie Gibbs - Lay It Down" } } From 5d13df79a51235392bde81274c90e780041e12b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 17 Sep 2013 22:49:43 +0200 Subject: [PATCH 026/103] [francetv] Remove Pluzz test Videos expire in 7 days --- youtube_dl/extractor/francetv.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index f2b12c884..b8fe82e47 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -34,17 +34,7 @@ class PluzzIE(FranceTVBaseInfoExtractor): IE_NAME = u'pluzz.francetv.fr' _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' - _TEST = { - u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html', - u'file': u'88439064.mp4', - u'info_dict': { - u'title': u'Allô Rufo', - u'description': u'md5:d909f1ebdf963814b65772aea250400e', - }, - u'params': { - u'skip_download': True, - }, - } + # Can't use tests, videos expire in 7 days def _real_extract(self, url): title = re.match(self._VALID_URL, url).group(1) From 1237c9a3a5ef0abca961f7f2252fde7f9e99db66 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Tue, 17 Sep 2013 06:24:20 +0200 Subject: [PATCH 027/103] XHamsterIE: Fix support for new HD video url format and add test (closes PR #1443) --- youtube_dl/extractor/xhamster.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index fa759d30c..361619694 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -11,8 +11,8 @@ from ..utils import ( class XHamsterIE(InfoExtractor): """Information Extractor for xHamster""" - _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P[0-9]+)/.*\.html' - _TEST = { + _VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P[0-9]+)/(?P.+?)\.html(?:\?.*)?' + _TESTS = [{ u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', u'file': u'1509445.flv', u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa', @@ -21,13 +21,24 @@ class XHamsterIE(InfoExtractor): u"uploader_id": u"Ruseful2011", u"title": u"FemaleAgent Shy beauty takes the bait" } - } + }, + { + u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', + u'file': u'2221348.flv', + u'md5': u'e767b9475de189320f691f49c679c4c7', + u'info_dict': { + u"upload_date": u"20130914", + u"uploader_id": u"jojo747400", + u"title": u"Britney Spears Sexy Booty" + } + }] def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - mrss_url = 'http://xhamster.com/movies/%s/.html?hd' % video_id + seo = mobj.group('seo') + mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo) webpage = self._download_webpage(mrss_url, video_id) mobj = re.search(r'\'srv\': \'(?P[^\']*)\',\s*\'file\': \'(?P[^\']+)\',', webpage) From 830dd1944a3db8de373fe78ac805302915caf126 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 18 Sep 2013 13:23:04 +0200 Subject: [PATCH 028/103] Clarify -i help (#1453) --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 9efd7c3f7..e8299130c 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -149,7 +149,7 @@ def parseOpts(overrideArguments=None): general.add_option('-U', '--update', action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') general.add_option('-i', '--ignore-errors', - action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) + action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False) general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) From eb03f4dad3ebb0f781e6742b6c1c590506d58e5b Mon Sep 17 00:00:00 2001 From: Ruirize Date: Wed, 18 Sep 2013 15:54:45 +0100 Subject: [PATCH 029/103] Added Newgrounds support --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/newgrounds.py | 37 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 youtube_dl/extractor/newgrounds.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 761575062..e1ec38cf2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -68,6 +68,7 @@ from .myvideo import MyVideoIE from .naver import NaverIE from .nba import NBAIE from .nbc import NBCNewsIE +from .newgrounds import NewgroundsIE from .ooyala import OoyalaIE from .orf import ORFIE from .pbs import PBSIE diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py new file mode 100644 index 000000000..d19145a72 --- /dev/null +++ b/youtube_dl/extractor/newgrounds.py @@ -0,0 +1,37 @@ +import json +import re + +from .common import InfoExtractor +from ..utils import determine_ext + +class NewgroundsIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P\d+)' + _TEST = { + u'url': u'http://www.newgrounds.com/audio/listen/549479', + u'file': u'549479_B7---BusMode.mp3', + u'md5': u'2924d938f60415cd7afbe7ae9042a99e', + u'info_dict': { + u"title": u"B7 - BusMode", + u"uploader" : u"Burn7", + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + music_id = mobj.group('id') + webpage = self._download_webpage(url, music_id) + + title = self._html_search_regex(r',"name":"([^"]+)",', webpage, 'music title', flags=re.DOTALL) + uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, 'music uploader', flags=re.DOTALL) + + music_url_json_string = '{"url":"' + self._html_search_regex(r'{"url":"([^"]+)",', webpage, 'music url', flags=re.DOTALL) + '"}' + music_url_json = json.loads(music_url_json_string) + music_url = music_url_json['url'] + + return [{ + 'id': music_id, + 'title': title, + 'url': music_url, + 'uploader': uploader, + 'ext': determine_ext(music_url), + }] From 1ef80b55ddf05d7fe2bcba08c414aa10c524870d Mon Sep 17 00:00:00 2001 From: Ruirize Date: Wed, 18 Sep 2013 16:23:38 +0100 Subject: [PATCH 030/103] Fixes test fail Was unaware of --id being passed to test. --- youtube_dl/extractor/newgrounds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index d19145a72..f316b9272 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -8,7 +8,7 @@ class NewgroundsIE(InfoExtractor): _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P\d+)' _TEST = { u'url': u'http://www.newgrounds.com/audio/listen/549479', - u'file': u'549479_B7---BusMode.mp3', + u'file': u'549479.mp3', u'md5': u'2924d938f60415cd7afbe7ae9042a99e', u'info_dict': { u"title": u"B7 - BusMode", From a19413c311e1bd2ffef2705212a8719b7126eef9 Mon Sep 17 00:00:00 2001 From: Ruirize Date: Wed, 18 Sep 2013 17:17:12 +0100 Subject: [PATCH 031/103] Changed file hash. --- youtube_dl/extractor/newgrounds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index f316b9272..e66294ade 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -9,7 +9,7 @@ class NewgroundsIE(InfoExtractor): _TEST = { u'url': u'http://www.newgrounds.com/audio/listen/549479', u'file': u'549479.mp3', - u'md5': u'2924d938f60415cd7afbe7ae9042a99e', + u'md5': u'fe6033d297591288fa1c1f780386f07a', u'info_dict': { u"title": u"B7 - BusMode", u"uploader" : u"Burn7", From d0ae9e3a8d807d0466bccc27186c8c2d86215350 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 18 Sep 2013 22:14:43 +0200 Subject: [PATCH 032/103] [newgrounds] simplify --- youtube_dl/extractor/newgrounds.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index e66294ade..2ef80bce0 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import determine_ext + class NewgroundsIE(InfoExtractor): _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P\d+)' _TEST = { @@ -12,7 +13,7 @@ class NewgroundsIE(InfoExtractor): u'md5': u'fe6033d297591288fa1c1f780386f07a', u'info_dict': { u"title": u"B7 - BusMode", - u"uploader" : u"Burn7", + u"uploader": u"Burn7", } } @@ -21,17 +22,17 @@ class NewgroundsIE(InfoExtractor): music_id = mobj.group('id') webpage = self._download_webpage(url, music_id) - title = self._html_search_regex(r',"name":"([^"]+)",', webpage, 'music title', flags=re.DOTALL) - uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, 'music uploader', flags=re.DOTALL) + title = self._html_search_regex(r',"name":"([^"]+)",', webpage, u'music title') + uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, u'music uploader') - music_url_json_string = '{"url":"' + self._html_search_regex(r'{"url":"([^"]+)",', webpage, 'music url', flags=re.DOTALL) + '"}' + music_url_json_string = self._html_search_regex(r'({"url":"[^"]+"),', webpage, u'music url') + '}' music_url_json = json.loads(music_url_json_string) music_url = music_url_json['url'] - return [{ + return { 'id': music_id, - 'title': title, + 'title': title, 'url': music_url, 'uploader': uploader, 'ext': determine_ext(music_url), - }] + } From 2dad310e2cab1913ed1a8d1072b57b46e7257b1e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 18 Sep 2013 22:30:22 +0200 Subject: [PATCH 033/103] Credit @Ruirize for newgrounds --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e8299130c..df4feefe7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -30,6 +30,7 @@ __authors__ = ( 'Pierre Rudloff', 'Huarong Huo', 'Ismael Mejía', + 'Steffan \'Ruirize\' James', ) __license__ = 'Public Domain' From 71c82637e7add9b437bc6dbe03035d6d8aae82e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 18 Sep 2013 23:00:32 +0200 Subject: [PATCH 034/103] [youtube] apply the fix for lists with number of videos multiple of _MAX_RESULTS to user extraction Copied from the playlist extractor. --- youtube_dl/extractor/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f227e2086..23a8097c5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1005,6 +1005,9 @@ class YoutubeUserIE(InfoExtractor): response = json.loads(page) except ValueError as err: raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) + if 'entry' not in response['feed']: + # Number of videos is a multiple of self._MAX_RESULTS + break # Extract video identifiers ids_in_page = [] From c5e743f66f5637fe02fe0b5167fab99a06b903e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 18 Sep 2013 23:32:37 +0200 Subject: [PATCH 035/103] [fktv] support videos splitted in any number of parts and some style changes --- youtube_dl/extractor/fktv.py | 57 ++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index 239d9df38..9c89362ef 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -1,37 +1,58 @@ -import re,random +import re +import random +import json from .common import InfoExtractor from ..utils import ( determine_ext, + get_element_by_id, + clean_html, ) + class FKTVIE(InfoExtractor): - """Information Extractor for Fernsehkritik-TV""" + IE_NAME = u'fernsehkritik.tv' _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P[0-9]+)(?:/.*)?' - def _real_extract(self,url): + _TEST = { + u'url': u'http://fernsehkritik.tv/folge-1', + u'file': u'00011.flv', + u'info_dict': { + u'title': u'Folge 1 vom 10. April 2007', + u'description': u'md5:fb4818139c7cfe6907d4b83412a6864f', + }, + } + + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) episode = int(mobj.group('ep')) - - server = random.randint(2,4) + + server = random.randint(2, 4) video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode + start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode, + episode) + playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage, + u'playlist', flags=re.DOTALL) + files = json.loads(re.sub('{[^{}]*?}', '{}', playlist)) + # TODO: return a single multipart video videos = [] - # Download all three parts - for i in range(1,4): + for i, _ in enumerate(files, 1): video_id = '%04d%d' % (episode, i) - video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i==1 else '-%d'%i) + video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i) video_title = 'Fernsehkritik %d.%d' % (episode, i) videos.append({ - 'id': video_id, - 'url': video_url, - 'ext': determine_ext(video_url), - 'title': video_title, + 'id': video_id, + 'url': video_url, + 'ext': determine_ext(video_url), + 'title': clean_html(get_element_by_id('eptitle', start_webpage)), + 'description': clean_html(get_element_by_id('contentlist', start_webpage)), 'thumbnail': video_thumbnail }) return videos + class FKTVPosteckeIE(InfoExtractor): - """Information Extractor for Fernsehkritik-TV Postecke""" + IE_NAME = u'fernsehkritik.tv:postecke' _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P[0-9]+)(&|$)' _TEST = { u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120', @@ -42,17 +63,17 @@ class FKTVPosteckeIE(InfoExtractor): } } - def _real_extract(self,url): + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) episode = int(mobj.group('ep')) - - server = random.randint(2,4) + + server = random.randint(2, 4) video_id = '%04d' % episode video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode) video_title = 'Postecke %d' % episode - return[{ + return { 'id': video_id, 'url': video_url, 'ext': determine_ext(video_url), 'title': video_title, - }] + } From bc4b9008981096184739666941e73c8d09623502 Mon Sep 17 00:00:00 2001 From: patrickslin Date: Thu, 19 Sep 2013 21:49:06 -0700 Subject: [PATCH 036/103] Unable to decrypt signature length 93 (fixes #1461) --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 23a8097c5..e5f536e6f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -416,7 +416,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _decrypt_signature(self, s): """Turn the encrypted s field into a working signature""" - if len(s) == 92: + if len(s) == 93: + return s[86:29:-1] + s[88] + s[28:5:-1] + elif len(s) == 92: return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] elif len(s) == 90: return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] From 63037593c0cc3d5da4065368736d74fd594cb1fc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 20 Sep 2013 10:24:48 +0200 Subject: [PATCH 037/103] release 2013.09.20 --- README.md | 3 ++- youtube_dl/version.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 400e6cd48..f54945acc 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,8 @@ which means you can modify it, redistribute it or use it however you like. -U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed) - -i, --ignore-errors continue on download errors + -i, --ignore-errors continue on download errors, for example to to + skip unavailable videos in a playlist --dump-user-agent display the current browser identification --user-agent UA specify a custom user agent --referer REF specify a custom referer, use if the video access diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 80ccfbd4f..a79664521 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.09.17' +__version__ = '2013.09.20' From 1a810f0d4e63ba702e49b7404c3f5f74ef716759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 20 Sep 2013 13:05:34 +0200 Subject: [PATCH 038/103] [funnyordie] Fix video url extraction --- youtube_dl/extractor/funnyordie.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index f3d86a711..2ccdb7073 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -21,7 +21,8 @@ class FunnyOrDieIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - video_url = self._search_regex(r'type="video/mp4" src="(.*?)"', + video_url = self._search_regex( + [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''], webpage, u'video URL', flags=re.DOTALL) info = { From c40c6aaaaa80db619459be3bd7f93853da70be0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 20 Sep 2013 13:26:03 +0200 Subject: [PATCH 039/103] Catch socket.error before IOError Since python 2.6 it's a child class. --- youtube_dl/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index de2b133e0..d999099fe 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -544,11 +544,11 @@ class YoutubeDL(object): else: try: success = self.fd._do_download(filename, info_dict) - except (OSError, IOError) as err: - raise UnavailableVideoError(err) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_error(u'unable to download video data: %s' % str(err)) return + except (OSError, IOError) as err: + raise UnavailableVideoError(err) except (ContentTooShortError, ) as err: self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return From 38d025b3f0f6f349c36a4531f3b36d7e7553f417 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 20 Sep 2013 14:43:16 +0200 Subject: [PATCH 040/103] [youtube] add algo for length 91 --- devscripts/youtube_genalgo.py | 3 +++ youtube_dl/extractor/youtube.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index d4546758d..f91e8855d 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -12,6 +12,9 @@ tests = [ # 92 - vflQw-fB4 2013/07/17 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"", "mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"), + # 91 - vfl79wBKW 2013/07/20 (sporadic) + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~", + "/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543.1mnbvcxzasdfghjklpoiu"), # 90 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`", "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e5f536e6f..47d5cb7ff 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -420,6 +420,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return s[86:29:-1] + s[88] + s[28:5:-1] elif len(s) == 92: return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] + elif len(s) == 91: + return s[84:27:-1] + s[86] + s[26:5:-1] elif len(s) == 90: return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] elif len(s) == 89: From 3d60bb96e138ce8221f35b7f9d1e1b28f235083e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 20 Sep 2013 16:55:50 +0200 Subject: [PATCH 041/103] Add an extractor for ebaumsworld.com (closes #1462) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ebaumsworld.py | 37 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 youtube_dl/extractor/ebaumsworld.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 726c9fa15..c6a55f194 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -24,6 +24,7 @@ from .depositfiles import DepositFilesIE from .dotsub import DotsubIE from .dreisat import DreiSatIE from .defense import DefenseGouvFrIE +from .ebaumsworld import EbaumsWorldIE from .ehow import EHowIE from .eighttracks import EightTracksIE from .escapist import EscapistIE diff --git a/youtube_dl/extractor/ebaumsworld.py b/youtube_dl/extractor/ebaumsworld.py new file mode 100644 index 000000000..f02c6998b --- /dev/null +++ b/youtube_dl/extractor/ebaumsworld.py @@ -0,0 +1,37 @@ +import re +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import determine_ext + + +class EbaumsWorldIE(InfoExtractor): + _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P\d+)' + + _TEST = { + u'url': u'http://www.ebaumsworld.com/video/watch/83367677/', + u'file': u'83367677.mp4', + u'info_dict': { + u'title': u'A Giant Python Opens The Door', + u'description': u'This is how nightmares start...', + u'uploader': u'jihadpizza', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + config_xml = self._download_webpage( + 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id) + config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) + video_url = config.find('file').text + + return { + 'id': video_id, + 'title': config.find('title').text, + 'url': video_url, + 'ext': determine_ext(video_url), + 'description': config.find('description').text, + 'thumbnail': config.find('image').text, + 'uploader': config.find('username').text, + } From 58f289d013fb3d225488b43deb8216eee9154857 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 20 Sep 2013 22:59:14 +0200 Subject: [PATCH 042/103] release 2013.09.20.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a79664521..88d70b47a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.09.20' +__version__ = '2013.09.20.1' From 0fd49457f5257dbe317c69314ee57a6c485d41a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 21 Sep 2013 10:51:25 +0200 Subject: [PATCH 043/103] [southparkstudios] Fix mgid extraction --- youtube_dl/extractor/southparkstudios.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py index a5dc754dd..25f799a27 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southparkstudios.py @@ -14,7 +14,7 @@ class SouthParkStudiosIE(MTVIE): u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', u'info_dict': { u'title': u'Bat Daded', - u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.', + u'description': u'Randy finally gets the chance to fight Bat Dad and gets the boys disqualified from the season championships.', }, } @@ -29,6 +29,6 @@ class SouthParkStudiosIE(MTVIE): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"', + mgid = self._search_regex(r'data-mgid="(mgid:.*?)"', webpage, u'mgid') return self._get_videos_info(mgid) From 69b227a9bc75a75e9156f05d08c3c69337be64ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 21 Sep 2013 10:58:43 +0200 Subject: [PATCH 044/103] [southparkstudios] add support for http://www.southparkstudios.com/full-episodes/* urls (closes #1469) --- youtube_dl/extractor/southparkstudios.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py index 25f799a27..1a611d3bb 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southparkstudios.py @@ -5,7 +5,7 @@ from .mtv import MTVIE, _media_xml_tag class SouthParkStudiosIE(MTVIE): IE_NAME = u'southparkstudios.com' - _VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P\d+)' + _VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P.+?)(\?|#|$)' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' @@ -23,7 +23,11 @@ class SouthParkStudiosIE(MTVIE): def _get_thumbnail_url(self, uri, itemdoc): search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) - return itemdoc.find(search_path).attrib['url'] + thumb_node = itemdoc.find(search_path) + if thumb_node is None: + return None + else: + return thumb_node.attrib['url'] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From b61067fa4f6c3bd69452b2530ccdf277e0e23e8b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 Sep 2013 11:10:22 +0200 Subject: [PATCH 045/103] Abort if extractaudio is given without a variable extension (#1470) --- youtube_dl/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index df4feefe7..1ed30aae3 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -551,6 +551,10 @@ def _real_main(argv=None): or (opts.useid and u'%(id)s.%(ext)s') or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') or u'%(title)s-%(id)s.%(ext)s') + if '%(ext)s' not in outtmpl and opts.extractaudio: + parser.error(u'Cannot download a video and extract audio into the same' + u' file! Use "%%(ext)s" instead of %r' % + determine_ext(outtmpl, u'')) # YoutubeDL ydl = YoutubeDL({ From 34308b30d6c2b05819e362deab94ce590c325e67 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 Sep 2013 11:48:07 +0200 Subject: [PATCH 046/103] Warn if no locale is set (#1474) --- youtube_dl/YoutubeDL.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d999099fe..fa24ebe0d 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -104,6 +104,17 @@ class YoutubeDL(object): self._download_retcode = 0 self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] + + if (sys.version_info >= (3,) and sys.platform != 'win32' and + sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] + and not params['restrictfilenames']): + # On Python 3, the Unicode filesystem API will throw errors (#1474) + self.report_warning( + u'Assuming --restrict-filenames isnce file system encoding ' + u'cannot encode all charactes. ' + u'Set the LC_ALL environment variable to fix this.') + params['restrictfilenames'] = True + self.params = params self.fd = FileDownloader(self, self.params) From 3a1d48d6de0159807ff57b2cec6766cbfd400f00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 21 Sep 2013 12:15:54 +0200 Subject: [PATCH 047/103] [dailymotion] Raise ExtractorError if the dailymotion response reports an error --- youtube_dl/extractor/dailymotion.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 360113f9c..ce7057a26 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -63,6 +63,9 @@ class DailymotionIE(SubtitlesInfoExtractor): info = self._search_regex(r'var info = ({.*?}),$', embed_page, 'video info', flags=re.MULTILINE) info = json.loads(info) + if info.get('error') is not None: + msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] + raise ExtractorError(msg, expected=True) # TODO: support choosing qualities From 39baacc49f323adc639d502d38a016ebd63acd75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 21 Sep 2013 12:45:53 +0200 Subject: [PATCH 048/103] [dailymotion] Add an extractor for users (closes #1476) --- test/test_playlists.py | 16 ++++++++++- youtube_dl/extractor/__init__.py | 6 +++- youtube_dl/extractor/dailymotion.py | 44 +++++++++++++++++++++++------ 3 files changed, 55 insertions(+), 11 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index d079a4f23..e22054d69 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# encoding: utf-8 import sys import unittest @@ -8,7 +9,13 @@ import json import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE +from youtube_dl.extractor import ( + DailymotionPlaylistIE, + DailymotionUserIE, + VimeoChannelIE, + UstreamChannelIE, + SoundcloudUserIE, +) from youtube_dl.utils import * from helper import FakeYDL @@ -25,6 +32,13 @@ class TestPlaylists(unittest.TestCase): self.assertIsPlaylist(result) self.assertEqual(result['title'], u'SPORT') self.assertTrue(len(result['entries']) > 20) + def test_dailymotion_user(self): + dl = FakeYDL() + ie = DailymotionUserIE(dl) + result = ie.extract('http://www.dailymotion.com/user/generation-quoi/') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], u'Génération Quoi') + self.assertTrue(len(result['entries']) >= 26) def test_vimeo_channel(self): dl = FakeYDL() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c6a55f194..949f59a44 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -18,7 +18,11 @@ from .comedycentral import ComedyCentralIE from .condenast import CondeNastIE from .criterion import CriterionIE from .cspan import CSpanIE -from .dailymotion import DailymotionIE, DailymotionPlaylistIE +from .dailymotion import ( + DailymotionIE, + DailymotionPlaylistIE, + DailymotionUserIE, +) from .daum import DaumIE from .depositfiles import DepositFilesIE from .dotsub import DotsubIE diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index ce7057a26..64b89aae8 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -114,28 +114,54 @@ class DailymotionIE(SubtitlesInfoExtractor): class DailymotionPlaylistIE(InfoExtractor): + IE_NAME = u'dailymotion:playlist' _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/' _MORE_PAGES_INDICATOR = r'' + _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s' - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') + def _extract_entries(self, id): video_ids = [] - for pagenum in itertools.count(1): - webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum), - playlist_id, u'Downloading page %s' % pagenum) + webpage = self._download_webpage(self._PAGE_TEMPLATE % (id, pagenum), + id, u'Downloading page %s' % pagenum) playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el)) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break - - entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') + return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') for video_id in video_ids] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + webpage = self._download_webpage(url, playlist_id) + return {'_type': 'playlist', 'id': playlist_id, 'title': get_element_by_id(u'playlist_name', webpage), - 'entries': entries, + 'entries': self._extract_entries(playlist_id), } + + +class DailymotionUserIE(DailymotionPlaylistIE): + IE_NAME = u'dailymotion:user' + _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P[^/]+)' + _MORE_PAGES_INDICATOR = r'' + _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user = mobj.group('user') + webpage = self._download_webpage(url, user) + full_user = self._html_search_regex( + r'(.*?) Date: Sat, 21 Sep 2013 13:50:52 +0200 Subject: [PATCH 049/103] [livestream] Fix events extraction (fixes #1467) --- test/test_playlists.py | 10 ++++++++++ youtube_dl/extractor/livestream.py | 14 +++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index e22054d69..c33511333 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -15,6 +15,7 @@ from youtube_dl.extractor import ( VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE, + LivestreamIE, ) from youtube_dl.utils import * @@ -32,6 +33,7 @@ class TestPlaylists(unittest.TestCase): self.assertIsPlaylist(result) self.assertEqual(result['title'], u'SPORT') self.assertTrue(len(result['entries']) > 20) + def test_dailymotion_user(self): dl = FakeYDL() ie = DailymotionUserIE(dl) @@ -64,5 +66,13 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], u'9615865') self.assertTrue(len(result['entries']) >= 12) + def test_livestream_event(self): + dl = FakeYDL() + ie = LivestreamIE(dl) + result = ie.extract('http://new.livestream.com/tedx/cityenglish') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], u'TEDCity2.0 (English)') + self.assertTrue(len(result['entries']) >= 4) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 309921078..d04da98c8 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -2,7 +2,12 @@ import re import json from .common import InfoExtractor -from ..utils import compat_urllib_parse_urlparse, compat_urlparse +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urlparse, + get_meta_content, + ExtractorError, +) class LivestreamIE(InfoExtractor): @@ -35,8 +40,11 @@ class LivestreamIE(InfoExtractor): if video_id is None: # This is an event page: - api_url = self._search_regex(r'event_design_eventId: \'(.+?)\'', - webpage, 'api url') + player = get_meta_content('twitter:player', webpage) + if player is None: + raise ExtractorError('Couldn\'t extract event api url') + api_url = player.replace('/player', '') + api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url) info = json.loads(self._download_webpage(api_url, event_name, u'Downloading event info')) videos = [self._extract_video_info(video_data['data']) From e0df6211cc9364f62406b2907fa830847324db53 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 Sep 2013 14:19:30 +0200 Subject: [PATCH 050/103] Restore accidentally deleted commits That's what happens if you let Windows machines write :( --- .gitignore | 1 + test/test_youtube_signature.py | 80 +++++ youtube_dl/extractor/youtube.py | 603 +++++++++++++++++++++++++++++++- youtube_dl/utils.py | 6 + 4 files changed, 673 insertions(+), 17 deletions(-) create mode 100644 test/test_youtube_signature.py diff --git a/.gitignore b/.gitignore index 61cb6bc3c..24fdb3626 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,4 @@ updates_key.pem *.flv *.mp4 *.part +test/testdata diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py new file mode 100644 index 000000000..2c06caef4 --- /dev/null +++ b/test/test_youtube_signature.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +import io +import re +import string +import sys +import unittest + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.extractor import YoutubeIE +from youtube_dl.utils import compat_str, compat_urlretrieve + +_TESTS = [ + ( + u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', + u'js', + 86, + u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', + ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', + u'js', + 85, + u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', + ), + ( + u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf', + u'swf', + 82, + u'23456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?#$%&\'()*+,-./:;<=>"' + ), +] + + +class TestSignature(unittest.TestCase): + def setUp(self): + TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') + if not os.path.exists(self.TESTDATA_DIR): + os.mkdir(self.TESTDATA_DIR) + + +def make_testfunc(url, stype, sig_length, expected_sig): + basename = url.rpartition('/')[2] + m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) + assert m, '%r should follow URL format' % basename + test_id = m.group(1) + + def test_func(self): + fn = os.path.join(self.TESTDATA_DIR, basename) + + if not os.path.exists(fn): + compat_urlretrieve(url, fn) + + ie = YoutubeIE() + if stype == 'js': + with io.open(fn, encoding='utf-8') as testf: + jscode = testf.read() + func = ie._parse_sig_js(jscode) + else: + assert stype == 'swf' + with open(fn, 'rb') as testf: + swfcode = testf.read() + func = ie._parse_sig_swf(swfcode) + src_sig = compat_str(string.printable[:sig_length]) + got_sig = func(src_sig) + self.assertEqual(got_sig, expected_sig) + + test_func.__name__ = str('test_signature_' + stype + '_' + test_id) + setattr(TestSignature, test_func.__name__, test_func) + +for test_spec in _TESTS: + make_testfunc(*test_spec) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 47d5cb7ff..456d3cb0f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1,11 +1,16 @@ # coding: utf-8 +import collections +import itertools +import io import json import netrc import re import socket -import itertools -import xml.etree.ElementTree +import string +import struct +import traceback +import zlib from .common import InfoExtractor, SearchInfoExtractor from .subtitles import SubtitlesInfoExtractor @@ -393,6 +398,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if YoutubePlaylistIE.suitable(url): return False return re.match(cls._VALID_URL, url, re.VERBOSE) is not None + def __init__(self, *args, **kwargs): + super(YoutubeIE, self).__init__(*args, **kwargs) + self._jsplayer_cache = {} + def report_video_webpage_download(self, video_id): """Report attempt to download video webpage.""" self.to_screen(u'%s: Downloading video webpage' % video_id) @@ -413,15 +422,565 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): """Indicate the download will use the RTMP protocol.""" self.to_screen(u'RTMP download detected') - def _decrypt_signature(self, s): + def _extract_signature_function(self, video_id, player_url): + id_m = re.match(r'.*-(?P[^.]+)\.(?P[^.]+)$', player_url) + player_type = id_m.group('ext') + player_id = id_m.group('id') + + if player_type == 'js': + code = self._download_webpage( + player_url, video_id, + note=u'Downloading %s player %s' % (player_type, jsplayer_id), + errnote=u'Download of %s failed' % player_url) + return self._parse_sig_js(code) + elif player_tpye == 'swf': + urlh = self._request_webpage( + player_url, video_id, + note=u'Downloading %s player %s' % (player_type, jsplayer_id), + errnote=u'Download of %s failed' % player_url) + code = urlh.read() + return self._parse_sig_swf(code) + else: + assert False, 'Invalid player type %r' % player_type + + def _parse_sig_js(self, jscode): + funcname = self._search_regex( + r'signature=([a-zA-Z]+)', jscode, + u'Initial JS player signature function name') + + functions = {} + + def argidx(varname): + return string.lowercase.index(varname) + + def interpret_statement(stmt, local_vars, allow_recursion=20): + if allow_recursion < 0: + raise ExctractorError(u'Recursion limit reached') + + if stmt.startswith(u'var '): + stmt = stmt[len(u'var '):] + ass_m = re.match(r'^(?P[a-z]+)(?:\[(?P[^\]]+)\])?' + + r'=(?P.*)$', stmt) + if ass_m: + if ass_m.groupdict().get('index'): + def assign(val): + lvar = local_vars[ass_m.group('out')] + idx = interpret_expression(ass_m.group('index'), + local_vars, allow_recursion) + assert isinstance(idx, int) + lvar[idx] = val + return val + expr = ass_m.group('expr') + else: + def assign(val): + local_vars[ass_m.group('out')] = val + return val + expr = ass_m.group('expr') + elif stmt.startswith(u'return '): + assign = lambda v: v + expr = stmt[len(u'return '):] + else: + raise ExtractorError( + u'Cannot determine left side of statement in %r' % stmt) + + v = interpret_expression(expr, local_vars, allow_recursion) + return assign(v) + + def interpret_expression(expr, local_vars, allow_recursion): + if expr.isdigit(): + return int(expr) + + if expr.isalpha(): + return local_vars[expr] + + m = re.match(r'^(?P[a-z]+)\.(?P.*)$', expr) + if m: + member = m.group('member') + val = local_vars[m.group('in')] + if member == 'split("")': + return list(val) + if member == 'join("")': + return u''.join(val) + if member == 'length': + return len(val) + if member == 'reverse()': + return val[::-1] + slice_m = re.match(r'slice\((?P.*)\)', member) + if slice_m: + idx = interpret_expression( + slice_m.group('idx'), local_vars, allow_recursion-1) + return val[idx:] + + m = re.match( + r'^(?P[a-z]+)\[(?P.+)\]$', expr) + if m: + val = local_vars[m.group('in')] + idx = interpret_expression(m.group('idx'), local_vars, + allow_recursion-1) + return val[idx] + + m = re.match(r'^(?P.+?)(?P[%])(?P.+?)$', expr) + if m: + a = interpret_expression(m.group('a'), + local_vars, allow_recursion) + b = interpret_expression(m.group('b'), + local_vars, allow_recursion) + return a % b + + m = re.match( + r'^(?P[a-zA-Z]+)\((?P[a-z0-9,]+)\)$', expr) + if m: + fname = m.group('func') + if fname not in functions: + functions[fname] = extract_function(fname) + argvals = [int(v) if v.isdigit() else local_vars[v] + for v in m.group('args').split(',')] + return functions[fname](argvals) + raise ExtractorError(u'Unsupported JS expression %r' % expr) + + def extract_function(funcname): + func_m = re.search( + r'function ' + re.escape(funcname) + + r'\((?P[a-z,]+)\){(?P[^}]+)}', + jscode) + argnames = func_m.group('args').split(',') + + def resf(args): + local_vars = dict(zip(argnames, args)) + for stmt in func_m.group('code').split(';'): + res = interpret_statement(stmt, local_vars) + return res + return resf + + initial_function = extract_function(funcname) + return lambda s: initial_function([s]) + + def _parse_sig_swf(self, file_contents): + if file_contents[1:3] != b'WS': + raise ExtractorError( + u'Not an SWF file; header is %r' % file_contents[:3]) + if file_contents[:1] == b'C': + content = zlib.decompress(file_contents[8:]) + else: + raise NotImplementedError(u'Unsupported compression format %r' % + file_contents[:1]) + + def extract_tags(content): + pos = 0 + while pos < len(content): + header16 = struct.unpack('> 6 + tag_len = header16 & 0x3f + if tag_len == 0x3f: + tag_len = struct.unpack('> 4 + methods = {} + if kind in [0x00, 0x06]: # Slot or Const + _, pos = u30(pos=pos) # Slot id + type_name_idx, pos = u30(pos=pos) + vindex, pos = u30(pos=pos) + if vindex != 0: + _, pos = read_byte(pos=pos) # vkind + elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter + _, pos = u30(pos=pos) # disp_id + method_idx, pos = u30(pos=pos) + methods[multinames[trait_name_idx]] = method_idx + elif kind == 0x04: # Class + _, pos = u30(pos=pos) # slot_id + _, pos = u30(pos=pos) # classi + elif kind == 0x05: # Function + _, pos = u30(pos=pos) # slot_id + function_idx, pos = u30(pos=pos) + methods[function_idx] = multinames[trait_name_idx] + else: + raise ExtractorError(u'Unsupported trait kind %d' % kind) + + if attrs & 0x4 != 0: # Metadata present + metadata_count, pos = u30(pos=pos) + for _c3 in range(metadata_count): + _, pos = u30(pos=pos) + + return (methods, pos) + + # Classes + TARGET_CLASSNAME = u'SignatureDecipher' + searched_idx = multinames.index(TARGET_CLASSNAME) + searched_class_id = None + class_count, p = u30() + for class_id in range(class_count): + name_idx, p = u30() + if name_idx == searched_idx: + # We found the class we're looking for! + searched_class_id = class_id + _, p = u30() # super_name idx + flags, p = read_byte() + if flags & 0x08 != 0: # Protected namespace is present + protected_ns_idx, p = u30() + intrf_count, p = u30() + for _c2 in range(intrf_count): + _, p = u30() + _, p = u30() # iinit + trait_count, p = u30() + for _c2 in range(trait_count): + _, p = parse_traits_info() + + if searched_class_id is None: + raise ExtractorError(u'Target class %r not found' % + TARGET_CLASSNAME) + + method_names = {} + method_idxs = {} + for class_id in range(class_count): + _, p = u30() # cinit + trait_count, p = u30() + for _c2 in range(trait_count): + trait_methods, p = parse_traits_info() + if class_id == searched_class_id: + method_names.update(trait_methods.items()) + method_idxs.update(dict( + (idx, name) + for name, idx in trait_methods.items())) + + # Scripts + script_count, p = u30() + for _c in range(script_count): + _, p = u30() # init + trait_count, p = u30() + for _c2 in range(trait_count): + _, p = parse_traits_info() + + # Method bodies + method_body_count, p = u30() + Method = collections.namedtuple('Method', ['code', 'local_count']) + methods = {} + for _c in range(method_body_count): + method_idx, p = u30() + max_stack, p = u30() + local_count, p = u30() + init_scope_depth, p = u30() + max_scope_depth, p = u30() + code_length, p = u30() + if method_idx in method_idxs: + m = Method(code_tag[p:p+code_length], local_count) + methods[method_idxs[method_idx]] = m + p += code_length + exception_count, p = u30() + for _c2 in range(exception_count): + _, p = u30() # from + _, p = u30() # to + _, p = u30() # target + _, p = u30() # exc_type + _, p = u30() # var_name + trait_count, p = u30() + for _c2 in range(trait_count): + _, p = parse_traits_info() + + assert p == len(code_tag) + assert len(methods) == len(method_idxs) + + method_pyfunctions = {} + + def extract_function(func_name): + if func_name in method_pyfunctions: + return method_pyfunctions[func_name] + if func_name not in methods: + raise ExtractorError(u'Cannot find function %r' % func_name) + m = methods[func_name] + + def resfunc(args): + print('Entering function %s(%r)' % (func_name, args)) + registers = ['(this)'] + list(args) + [None] * m.local_count + stack = [] + coder = io.BytesIO(m.code) + while True: + opcode = struct.unpack('!B', coder.read(1))[0] + if opcode == 208: # getlocal_0 + stack.append(registers[0]) + elif opcode == 209: # getlocal_1 + stack.append(registers[1]) + elif opcode == 210: # getlocal_2 + stack.append(registers[2]) + elif opcode == 36: # pushbyte + v = struct.unpack('!B', coder.read(1))[0] + stack.append(v) + elif opcode == 44: # pushstring + idx = u30(coder) + stack.append(constant_strings[idx]) + elif opcode == 48: # pushscope + # We don't implement the scope register, so we'll just + # ignore the popped value + stack.pop() + elif opcode == 70: # callproperty + index = u30(coder) + mname = multinames[index] + arg_count = u30(coder) + args = list(reversed( + [stack.pop() for _ in range(arg_count)])) + obj = stack.pop() + if mname == u'split': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + assert isinstance(obj, compat_str) + if args[0] == u'': + res = list(obj) + else: + res = obj.split(args[0]) + stack.append(res) + elif mname in method_pyfunctions: + stack.append(method_pyfunctions[mname](args)) + else: + raise NotImplementedError( + u'Unsupported property %r on %r' + % (mname, obj)) + elif opcode == 93: # findpropstrict + index = u30(coder) + mname = multinames[index] + res = extract_function(mname) + stack.append(res) + elif opcode == 97: # setproperty + index = u30(coder) + value = stack.pop() + idx = stack.pop() + obj = stack.pop() + assert isinstance(obj, list) + assert isinstance(idx, int) + obj[idx] = value + elif opcode == 98: # getlocal + index = u30(coder) + stack.append(registers[index]) + elif opcode == 99: # setlocal + index = u30(coder) + value = stack.pop() + registers[index] = value + elif opcode == 102: # getproperty + index = u30(coder) + pname = multinames[index] + if pname == u'length': + obj = stack.pop() + assert isinstance(obj, list) + stack.append(len(obj)) + else: # Assume attribute access + idx = stack.pop() + assert isinstance(idx, int) + obj = stack.pop() + assert isinstance(obj, list) + stack.append(obj[idx]) + elif opcode == 128: # coerce + _ = u30(coder) + elif opcode == 133: # coerce_s + assert isinstance(stack[-1], (type(None), compat_str)) + elif opcode == 164: # modulo + value2 = stack.pop() + value1 = stack.pop() + res = value1 % value2 + stack.append(res) + elif opcode == 214: # setlocal_2 + registers[2] = stack.pop() + elif opcode == 215: # setlocal_3 + registers[3] = stack.pop() + else: + raise NotImplementedError( + u'Unsupported opcode %d' % opcode) + + method_pyfunctions[func_name] = resfunc + return resfunc + + initial_function = extract_function(u'decipher') + return lambda s: initial_function([s]) + + def _decrypt_signature(self, s, video_id, jsplayer_url, age_gate=False): """Turn the encrypted s field into a working signature""" - if len(s) == 93: - return s[86:29:-1] + s[88] + s[28:5:-1] - elif len(s) == 92: + if jsplayer_url is not None: + try: + if jsplayer_url not in self._jsplayer_cache: + self._jsplayer_cache[jsplayer_url] = self._extract_signature_function( + video_id, jsplayer_url + ) + return self._jsplayer_cache[jsplayer_url]([s]) + except Exception as e: + tb = traceback.format_exc() + self._downloader.report_warning(u'Automatic signature extraction failed: ' + tb) + + self._downloader.report_warning(u'Warning: Falling back to static signature algorithm') + + if age_gate: + # The videos with age protection use another player, so the + # algorithms can be different. + if len(s) == 86: + return s[2:63] + s[82] + s[64:82] + s[63] + + if len(s) == 92: return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] - elif len(s) == 91: - return s[84:27:-1] + s[86] + s[26:5:-1] elif len(s) == 90: return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] elif len(s) == 89: @@ -631,7 +1190,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): video_webpage = video_webpage_bytes.decode('utf-8', 'ignore') # Attempt to extract SWF player URL - mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) + mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) if mobj is not None: player_url = re.sub(r'\\(.)', r'\1', mobj.group(1)) else: @@ -784,21 +1343,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if 'sig' in url_data: url += '&signature=' + url_data['sig'][0] elif 's' in url_data: + encrypted_sig = url_data['s'][0] if self._downloader.params.get('verbose'): - s = url_data['s'][0] if age_gate: - player = 'flash player' + player_version = self._search_regex(r'-(.+)\.swf$', + player_url if player_url else 'NOT FOUND', + 'flash player', fatal=False) + player_desc = 'flash player %s' % player_version else: - player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage, + player_version = self._search_regex(r'html5player-(.+?)\.js', video_webpage, 'html5 player', fatal=False) - parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.')) + player_desc = u'html5 player %s' % player_version + + parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.')) self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % - (len(s), parts_sizes, url_data['itag'][0], player)) - encrypted_sig = url_data['s'][0] + (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc)) + if age_gate: - signature = self._decrypt_signature_age_gate(encrypted_sig) + jsplayer_url = None else: - signature = self._decrypt_signature(encrypted_sig) + jsplayer_url_json = self._search_regex( + r'"assets":.+?"js":\s*("[^"]+")', + video_webpage, u'JS player URL') + jsplayer_url = json.loads(jsplayer_url_json) + + signature = self._decrypt_signature(encrypted_sig, video_id, jsplayer_url, age_gate) url += '&signature=' + signature if 'ratebypass' not in url: url += '&ratebypass=yes' diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 814a9b6be..201ed255d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -66,6 +66,12 @@ try: except ImportError: # Python 2 from urllib2 import HTTPError as compat_HTTPError +try: + from urllib.request import urlretrieve as compat_urlretrieve +except ImportError: # Python 2 + from urllib import urlretrieve as compat_urlretrieve + + try: from subprocess import DEVNULL compat_subprocess_get_DEVNULL = lambda: DEVNULL From a7177865b19cdf711f15e01541aee9deae97a56c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 Sep 2013 14:48:12 +0200 Subject: [PATCH 051/103] Implement more opcodes --- youtube_dl/extractor/youtube.py | 45 ++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 456d3cb0f..b57693ee6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -863,13 +863,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): coder = io.BytesIO(m.code) while True: opcode = struct.unpack('!B', coder.read(1))[0] - if opcode == 208: # getlocal_0 - stack.append(registers[0]) - elif opcode == 209: # getlocal_1 - stack.append(registers[1]) - elif opcode == 210: # getlocal_2 - stack.append(registers[2]) - elif opcode == 36: # pushbyte + if opcode == 36: # pushbyte v = struct.unpack('!B', coder.read(1))[0] stack.append(v) elif opcode == 44: # pushstring @@ -895,12 +889,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: res = obj.split(args[0]) stack.append(res) + elif mname == u'slice': + assert len(args) == 1 + assert isinstance(args[0], int) + assert isinstance(obj, list) + res = obj[args[0]:] + stack.append(res) + elif mname == u'join': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + assert isinstance(obj, list) + res = args[0].join(obj) + stack.append(res) elif mname in method_pyfunctions: stack.append(method_pyfunctions[mname](args)) else: raise NotImplementedError( u'Unsupported property %r on %r' % (mname, obj)) + elif opcode == 72: # returnvalue + res = stack.pop() + return res + elif opcode == 79: # callpropvoid + index = u30(coder) + mname = multinames[index] + arg_count = u30(coder) + args = list(reversed( + [stack.pop() for _ in range(arg_count)])) + obj = stack.pop() + if mname == u'reverse': + assert isinstance(obj, list) + obj.reverse() + else: + raise NotImplementedError( + u'Unsupported (void) property %r on %r' + % (mname, obj)) elif opcode == 93: # findpropstrict index = u30(coder) mname = multinames[index] @@ -943,6 +966,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): value1 = stack.pop() res = value1 % value2 stack.append(res) + elif opcode == 208: # getlocal_0 + stack.append(registers[0]) + elif opcode == 209: # getlocal_1 + stack.append(registers[1]) + elif opcode == 210: # getlocal_2 + stack.append(registers[2]) + elif opcode == 211: # getlocal_3 + stack.append(registers[3]) elif opcode == 214: # setlocal_2 registers[2] = stack.pop() elif opcode == 215: # setlocal_3 From 95dbd2f9907416e86424e4372dbd2593c1699e7d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 Sep 2013 15:10:38 +0200 Subject: [PATCH 052/103] Change test target (Verified with node.js) --- test/test_youtube_signature.py | 2 +- youtube_dl/extractor/youtube.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 2c06caef4..36533cf1f 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -30,7 +30,7 @@ _TESTS = [ u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf', u'swf', 82, - u'23456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?#$%&\'()*+,-./:;<=>"' + u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321' ), ] diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b57693ee6..45b593a12 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -857,7 +857,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): m = methods[func_name] def resfunc(args): - print('Entering function %s(%r)' % (func_name, args)) registers = ['(this)'] + list(args) + [None] * m.local_count stack = [] coder = io.BytesIO(m.code) From 8379969834b787708ef5574dc447028c1caf295b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 Sep 2013 15:19:48 +0200 Subject: [PATCH 053/103] Prepare signature function caching --- youtube_dl/extractor/youtube.py | 57 ++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 45b593a12..2cd2fdce3 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -400,7 +400,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def __init__(self, *args, **kwargs): super(YoutubeIE, self).__init__(*args, **kwargs) - self._jsplayer_cache = {} + self._player_cache = {} def report_video_webpage_download(self, video_id): """Report attempt to download video webpage.""" @@ -423,26 +423,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self.to_screen(u'RTMP download detected') def _extract_signature_function(self, video_id, player_url): - id_m = re.match(r'.*-(?P[^.]+)\.(?P[^.]+)$', player_url) + id_m = re.match(r'.*-(?P[a-zA-Z0-9]+)\.(?P[a-z]+)$', + player_url) player_type = id_m.group('ext') player_id = id_m.group('id') + # TODO read from filesystem cache + if player_type == 'js': code = self._download_webpage( player_url, video_id, - note=u'Downloading %s player %s' % (player_type, jsplayer_id), + note=u'Downloading %s player %s' % (player_type, player_id), errnote=u'Download of %s failed' % player_url) - return self._parse_sig_js(code) + res = self._parse_sig_js(code) elif player_tpye == 'swf': urlh = self._request_webpage( player_url, video_id, - note=u'Downloading %s player %s' % (player_type, jsplayer_id), + note=u'Downloading %s player %s' % (player_type, player_id), errnote=u'Download of %s failed' % player_url) code = urlh.read() - return self._parse_sig_swf(code) + res = self._parse_sig_swf(code) else: assert False, 'Invalid player type %r' % player_type + # TODO write cache + + return res + def _parse_sig_js(self, jscode): funcname = self._search_regex( r'signature=([a-zA-Z]+)', jscode, @@ -987,22 +994,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): initial_function = extract_function(u'decipher') return lambda s: initial_function([s]) - def _decrypt_signature(self, s, video_id, jsplayer_url, age_gate=False): + def _decrypt_signature(self, s, video_id, player_url, age_gate=False): """Turn the encrypted s field into a working signature""" - if jsplayer_url is not None: + if player_url is not None: try: - if jsplayer_url not in self._jsplayer_cache: - self._jsplayer_cache[jsplayer_url] = self._extract_signature_function( - video_id, jsplayer_url + if player_url not in self._player_cache: + func = self._extract_signature_function( + video_id, player_url ) - return self._jsplayer_cache[jsplayer_url]([s]) + self._player_cache[player_url] = func + return self._player_cache[player_url](s) except Exception as e: tb = traceback.format_exc() - self._downloader.report_warning(u'Automatic signature extraction failed: ' + tb) + self._downloader.report_warning( + u'Automatic signature extraction failed: ' + tb) - self._downloader.report_warning(u'Warning: Falling back to static signature algorithm') + self._downloader.report_warning( + u'Warning: Falling back to static signature algorithm') + return self._static_decrypt_signature(s) + def _static_decrypt_signature(self, s): if age_gate: # The videos with age protection use another player, so the # algorithms can be different. @@ -1376,12 +1388,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): encrypted_sig = url_data['s'][0] if self._downloader.params.get('verbose'): if age_gate: - player_version = self._search_regex(r'-(.+)\.swf$', - player_url if player_url else 'NOT FOUND', + player_version = self._search_regex( + r'-(.+)\.swf$', + player_url if player_url else None, 'flash player', fatal=False) player_desc = 'flash player %s' % player_version else: - player_version = self._search_regex(r'html5player-(.+?)\.js', video_webpage, + player_version = self._search_regex( + r'html5player-(.+?)\.js', video_webpage, 'html5 player', fatal=False) player_desc = u'html5 player %s' % player_version @@ -1389,15 +1403,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc)) - if age_gate: - jsplayer_url = None - else: + if not age_gate: jsplayer_url_json = self._search_regex( r'"assets":.+?"js":\s*("[^"]+")', video_webpage, u'JS player URL') - jsplayer_url = json.loads(jsplayer_url_json) + player_url = json.loads(jsplayer_url_json) - signature = self._decrypt_signature(encrypted_sig, video_id, jsplayer_url, age_gate) + signature = self._decrypt_signature( + encrypted_sig, video_id, player_url, age_gate) url += '&signature=' + signature if 'ratebypass' not in url: url += '&ratebypass=yes' From ba552f542f674d35de21d48978f211b8db3f0ff8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 Sep 2013 15:32:37 +0200 Subject: [PATCH 054/103] Use reader instead of indexing --- youtube_dl/extractor/youtube.py | 262 +++++++++++++++----------------- 1 file changed, 122 insertions(+), 140 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2cd2fdce3..09bd423f5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -590,99 +590,83 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): for tag_code, tag in extract_tags(content) if tag_code == 82) p = code_tag.index(b'\0', 4) + 1 + code_reader = io.BytesIO(code_tag[p:]) # Parse ABC (AVM2 ByteCode) - def read_int(data=None, pos=None): - if hasattr(data, 'read'): - assert pos is None - - res = 0 - shift = 0 - for _ in range(5): - buf = data.read(1) - assert len(buf) == 1 - b = struct.unpack('> 4 methods = {} if kind in [0x00, 0x06]: # Slot or Const - _, pos = u30(pos=pos) # Slot id - type_name_idx, pos = u30(pos=pos) - vindex, pos = u30(pos=pos) + _ = u30() # Slot id + type_name_idx = u30() + vindex = u30() if vindex != 0: - _, pos = read_byte(pos=pos) # vkind + _ = read_byte() # vkind elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter - _, pos = u30(pos=pos) # disp_id - method_idx, pos = u30(pos=pos) + _ = u30() # disp_id + method_idx = u30() methods[multinames[trait_name_idx]] = method_idx elif kind == 0x04: # Class - _, pos = u30(pos=pos) # slot_id - _, pos = u30(pos=pos) # classi + _ = u30() # slot_id + _ = u30() # classi elif kind == 0x05: # Function - _, pos = u30(pos=pos) # slot_id - function_idx, pos = u30(pos=pos) + _ = u30() # slot_id + function_idx = u30() methods[function_idx] = multinames[trait_name_idx] else: raise ExtractorError(u'Unsupported trait kind %d' % kind) if attrs & 0x4 != 0: # Metadata present - metadata_count, pos = u30(pos=pos) + metadata_count = u30() for _c3 in range(metadata_count): - _, pos = u30(pos=pos) + _ = u30() - return (methods, pos) + return methods # Classes TARGET_CLASSNAME = u'SignatureDecipher' searched_idx = multinames.index(TARGET_CLASSNAME) searched_class_id = None - class_count, p = u30() + class_count = u30() for class_id in range(class_count): - name_idx, p = u30() + name_idx = u30() if name_idx == searched_idx: # We found the class we're looking for! searched_class_id = class_id - _, p = u30() # super_name idx - flags, p = read_byte() + _ = u30() # super_name idx + flags = read_byte() if flags & 0x08 != 0: # Protected namespace is present - protected_ns_idx, p = u30() - intrf_count, p = u30() + protected_ns_idx = u30() + intrf_count = u30() for _c2 in range(intrf_count): - _, p = u30() - _, p = u30() # iinit - trait_count, p = u30() + _ = u30() + _ = u30() # iinit + trait_count = u30() for _c2 in range(trait_count): - _, p = parse_traits_info() + _ = parse_traits_info() if searched_class_id is None: raise ExtractorError(u'Target class %r not found' % @@ -807,10 +789,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): method_names = {} method_idxs = {} for class_id in range(class_count): - _, p = u30() # cinit - trait_count, p = u30() + _ = u30() # cinit + trait_count = u30() for _c2 in range(trait_count): - trait_methods, p = parse_traits_info() + trait_methods = parse_traits_info() if class_id == searched_class_id: method_names.update(trait_methods.items()) method_idxs.update(dict( @@ -818,40 +800,40 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): for name, idx in trait_methods.items())) # Scripts - script_count, p = u30() + script_count = u30() for _c in range(script_count): - _, p = u30() # init - trait_count, p = u30() + _ = u30() # init + trait_count = u30() for _c2 in range(trait_count): - _, p = parse_traits_info() + _ = parse_traits_info() # Method bodies - method_body_count, p = u30() + method_body_count = u30() Method = collections.namedtuple('Method', ['code', 'local_count']) methods = {} for _c in range(method_body_count): - method_idx, p = u30() - max_stack, p = u30() - local_count, p = u30() - init_scope_depth, p = u30() - max_scope_depth, p = u30() - code_length, p = u30() + method_idx = u30() + max_stack = u30() + local_count = u30() + init_scope_depth = u30() + max_scope_depth = u30() + code_length = u30() + code = read_bytes(code_length) if method_idx in method_idxs: - m = Method(code_tag[p:p+code_length], local_count) + m = Method(code, local_count) methods[method_idxs[method_idx]] = m - p += code_length - exception_count, p = u30() + exception_count = u30() for _c2 in range(exception_count): - _, p = u30() # from - _, p = u30() # to - _, p = u30() # target - _, p = u30() # exc_type - _, p = u30() # var_name - trait_count, p = u30() + _ = u30() # from + _ = u30() # to + _ = u30() # target + _ = u30() # exc_type + _ = u30() # var_name + trait_count = u30() for _c2 in range(trait_count): - _, p = parse_traits_info() + _ = parse_traits_info() - assert p == len(code_tag) + assert p + code_reader.tell() == len(code_tag) assert len(methods) == len(method_idxs) method_pyfunctions = {} From 2f2ffea9cad7d30165a0171bf6e662bef2182ab4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 21 Sep 2013 15:34:29 +0200 Subject: [PATCH 055/103] Clarify a couple of calls --- youtube_dl/extractor/youtube.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 09bd423f5..5c0ea2e43 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -641,7 +641,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return res # minor_version + major_version - _ = read_bytes(4) + _ = read_bytes(2 + 2) # Constant pool int_count = u30() @@ -994,9 +994,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self._downloader.report_warning( u'Warning: Falling back to static signature algorithm') - return self._static_decrypt_signature(s) + return self._static_decrypt_signature( + s, video_id, player_url, age_gate) - def _static_decrypt_signature(self, s): + def _static_decrypt_signature(self, s, video_id, player_url, age_gate): if age_gate: # The videos with age protection use another player, so the # algorithms can be different. From 4a2080e4077e9e12c860d82a4d2eebc75c1ea54b Mon Sep 17 00:00:00 2001 From: tewe Date: Sun, 15 Sep 2013 21:58:49 +0200 Subject: [PATCH 056/103] [youku] better error handling blocked videos used to cause death by TypeError, now we report what the server says --- youtube_dl/extractor/youku.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 996d38478..00fa2ccb5 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -66,6 +66,12 @@ class YoukuIE(InfoExtractor): self.report_extraction(video_id) try: config = json.loads(jsondata) + error_code = config['data'][0].get('error_code') + if error_code: + # -8 means blocked outside China. + error = config['data'][0].get('error') # Chinese and English, separated by newline. + raise ExtractorError(error or u'Server reported error %i' % error_code, + expected=True) video_title = config['data'][0]['title'] seed = config['data'][0]['seed'] @@ -89,6 +95,7 @@ class YoukuIE(InfoExtractor): fileid = config['data'][0]['streamfileids'][format] keys = [s['k'] for s in config['data'][0]['segs'][format]] + # segs is usually a dictionary, but an empty *list* if an error occured. except (UnicodeDecodeError, ValueError, KeyError): raise ExtractorError(u'Unable to extract info section') From c4417ddb611e14b81fe56b6b32964c5802faf554 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 22 Sep 2013 00:35:03 +0200 Subject: [PATCH 057/103] [youtube] Add filesystem signature cache --- youtube_dl/FileDownloader.py | 2 ++ youtube_dl/extractor/youtube.py | 35 ++++++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 0b5a5d77d..1eb71a80e 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -39,6 +39,8 @@ class FileDownloader(object): test: Download only first bytes to test the downloader. min_filesize: Skip files smaller than this size max_filesize: Skip files larger than this size + cachedir: Location of the cache files in the filesystem. + False to disable filesystem cache. """ params = None diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5c0ea2e43..63f59ae8f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -4,8 +4,10 @@ import collections import itertools import io import json -import netrc +import operator +import os.path import re +import shutil import socket import string import struct @@ -422,13 +424,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): """Indicate the download will use the RTMP protocol.""" self.to_screen(u'RTMP download detected') - def _extract_signature_function(self, video_id, player_url): - id_m = re.match(r'.*-(?P[a-zA-Z0-9]+)\.(?P[a-z]+)$', + def _extract_signature_function(self, video_id, player_url, slen): + id_m = re.match(r'.*-(?P[a-zA-Z0-9_-]+)\.(?P[a-z]+)$', player_url) player_type = id_m.group('ext') player_id = id_m.group('id') - # TODO read from filesystem cache + # Read from filesystem cache + func_id = '%s_%s_%d' % (player_type, player_id, slen) + assert os.path.basename(func_id) == func_id + cache_dir = self.downloader.params.get('cachedir', + u'~/.youtube-dl/cache') + + if cache_dir is not False: + cache_fn = os.path.join(os.path.expanduser(cache_dir), + u'youtube-sigfuncs', + func_id + '.json') + try: + with io.open(cache_fn, '', encoding='utf-8') as cachef: + cache_spec = json.load(cachef) + return lambda s: u''.join(s[i] for i in cache_spec) + except OSError: + pass # No cache available if player_type == 'js': code = self._download_webpage( @@ -436,7 +453,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): note=u'Downloading %s player %s' % (player_type, player_id), errnote=u'Download of %s failed' % player_url) res = self._parse_sig_js(code) - elif player_tpye == 'swf': + elif player_type == 'swf': urlh = self._request_webpage( player_url, video_id, note=u'Downloading %s player %s' % (player_type, player_id), @@ -446,7 +463,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: assert False, 'Invalid player type %r' % player_type - # TODO write cache + if cache_dir is not False: + cache_res = res(map(compat_chr, range(slen))) + cache_spec = [ord(c) for c in cache_res] + shutil.makedirs(os.path.dirname(cache_fn)) + write_json_file(cache_spec, cache_fn) return res @@ -983,7 +1004,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): try: if player_url not in self._player_cache: func = self._extract_signature_function( - video_id, player_url + video_id, player_url, len(s) ) self._player_cache[player_url] = func return self._player_cache[player_url](s) From edf3e38ebd6c5db21585dc7b6384e325e6cfb540 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 22 Sep 2013 10:30:02 +0200 Subject: [PATCH 058/103] [youtube] Improve cache and add an option to print the extracted signatures --- youtube_dl/FileDownloader.py | 2 +- youtube_dl/__init__.py | 6 +++ youtube_dl/extractor/youtube.py | 69 +++++++++++++++++++++++++++------ 3 files changed, 65 insertions(+), 12 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 1eb71a80e..604714134 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -40,7 +40,7 @@ class FileDownloader(object): min_filesize: Skip files smaller than this size max_filesize: Skip files larger than this size cachedir: Location of the cache files in the filesystem. - False to disable filesystem cache. + "NONE" to disable filesystem cache. """ params = None diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1ed30aae3..072f69f2e 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -167,6 +167,7 @@ def parseOpts(overrideArguments=None): help='Output descriptions of all supported extractors', default=False) general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') + general.add_option('--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache', help='Location in the filesystem where youtube-dl can store downloaded information permanently. NONE to disable filesystem caching, %default by default') selection.add_option('--playlist-start', @@ -272,6 +273,10 @@ def parseOpts(overrideArguments=None): verbosity.add_option('--dump-intermediate-pages', action='store_true', dest='dump_intermediate_pages', default=False, help='print downloaded pages to debug problems(very verbose)') + verbosity.add_option('--youtube-print-sig-code', + action='store_true', dest='youtube_print_sig_code', default=False, + help=optparse.SUPPRESS_HELP) + filesystem.add_option('-t', '--title', action='store_true', dest='usetitle', help='use title in file name (default)', default=False) @@ -613,6 +618,7 @@ def _real_main(argv=None): 'min_filesize': opts.min_filesize, 'max_filesize': opts.max_filesize, 'daterange': date, + 'youtube_print_sig_code': opts.youtube_print_sig_code }) if opts.verbose: diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 63f59ae8f..4200f987e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1,13 +1,13 @@ # coding: utf-8 import collections +import errno import itertools import io import json import operator import os.path import re -import shutil import socket import string import struct @@ -17,6 +17,7 @@ import zlib from .common import InfoExtractor, SearchInfoExtractor from .subtitles import SubtitlesInfoExtractor from ..utils import ( + compat_chr, compat_http_client, compat_parse_qs, compat_urllib_error, @@ -30,6 +31,7 @@ from ..utils import ( unescapeHTML, unified_strdate, orderedSet, + write_json_file, ) class YoutubeBaseInfoExtractor(InfoExtractor): @@ -433,18 +435,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Read from filesystem cache func_id = '%s_%s_%d' % (player_type, player_id, slen) assert os.path.basename(func_id) == func_id - cache_dir = self.downloader.params.get('cachedir', - u'~/.youtube-dl/cache') + cache_dir = self._downloader.params.get('cachedir', + u'~/.youtube-dl/cache') - if cache_dir is not False: + if cache_dir != u'NONE': cache_fn = os.path.join(os.path.expanduser(cache_dir), u'youtube-sigfuncs', func_id + '.json') try: - with io.open(cache_fn, '', encoding='utf-8') as cachef: + with io.open(cache_fn, 'r', encoding='utf-8') as cachef: cache_spec = json.load(cachef) return lambda s: u''.join(s[i] for i in cache_spec) - except OSError: + except IOError: pass # No cache available if player_type == 'js': @@ -464,13 +466,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): assert False, 'Invalid player type %r' % player_type if cache_dir is not False: - cache_res = res(map(compat_chr, range(slen))) - cache_spec = [ord(c) for c in cache_res] - shutil.makedirs(os.path.dirname(cache_fn)) - write_json_file(cache_spec, cache_fn) + try: + cache_res = res(map(compat_chr, range(slen))) + cache_spec = [ord(c) for c in cache_res] + try: + os.makedirs(os.path.dirname(cache_fn)) + except OSError as ose: + if ose.errno != errno.EEXIST: + raise + write_json_file(cache_spec, cache_fn) + except Exception as e: + tb = traceback.format_exc() + self._downloader.report_warning( + u'Writing cache to %r failed: %s' % (cache_fn, tb)) return res + def _print_sig_code(self, func, slen): + def gen_sig_code(idxs): + def _genslice(start, end, step): + starts = u'' if start == 0 else str(start) + ends = u':%d' % (end+step) + steps = u'' if step == 1 else (':%d' % step) + return u's[%s%s%s]' % (starts, ends, steps) + + step = None + for i, prev in zip(idxs[1:], idxs[:-1]): + if step is not None: + if i - prev == step: + continue + yield _genslice(start, prev, step) + step = None + continue + if i - prev in [-1, 1]: + step = i - prev + start = prev + continue + else: + yield u's[%d]' % prev + if step is None: + yield u's[%d]' % i + else: + yield _genslice(start, i, step) + + cache_res = func(map(compat_chr, range(slen))) + cache_spec = [ord(c) for c in cache_res] + expr_code = u' + '.join(gen_sig_code(cache_spec)) + code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code) + self.to_screen(u'Extracted signature:\n' + code) + def _parse_sig_js(self, jscode): funcname = self._search_regex( r'signature=([a-zA-Z]+)', jscode, @@ -1007,7 +1051,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): video_id, player_url, len(s) ) self._player_cache[player_url] = func - return self._player_cache[player_url](s) + func = self._player_cache[player_url] + if self._downloader.params.get('youtube_print_sig_code'): + self._print_sig_code(func, len(s)) + return func(s) except Exception as e: tb = traceback.format_exc() self._downloader.report_warning( From 4ba146f35dd797e9d78636cb3cffabb100575240 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 22 Sep 2013 10:31:25 +0200 Subject: [PATCH 059/103] Update static signatures --- youtube_dl/extractor/youtube.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4200f987e..8245349b2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1072,8 +1072,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if len(s) == 86: return s[2:63] + s[82] + s[64:82] + s[63] - if len(s) == 92: + if len(s) == 93: + return s[86:29:-1] + s[88] + s[28:5:-1] + elif len(s) == 92: return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] + elif len(s) == 91: + return s[84:27:-1] + s[86] + s[26:5:-1] elif len(s) == 90: return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] elif len(s) == 89: From 0ca96d48c7f74e122be70b71bb5fe38f4b143cb0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 22 Sep 2013 10:37:23 +0200 Subject: [PATCH 060/103] [youtube] Improve source code quality --- youtube_dl/extractor/youtube.py | 104 ++++++++++++++++---------------- 1 file changed, 53 insertions(+), 51 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8245349b2..a9bfc455f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2,16 +2,16 @@ import collections import errno -import itertools import io +import itertools import json -import operator import os.path import re import socket import string import struct import traceback +import xml.etree.ElementTree import zlib from .common import InfoExtractor, SearchInfoExtractor @@ -475,7 +475,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if ose.errno != errno.EEXIST: raise write_json_file(cache_spec, cache_fn) - except Exception as e: + except Exception: tb = traceback.format_exc() self._downloader.report_warning( u'Writing cache to %r failed: %s' % (cache_fn, tb)) @@ -491,6 +491,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return u's[%s%s%s]' % (starts, ends, steps) step = None + start = '(Never used)' # Quelch pyflakes warnings - start will be + # set as soon as step is set for i, prev in zip(idxs[1:], idxs[:-1]): if step is not None: if i - prev == step: @@ -527,7 +529,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def interpret_statement(stmt, local_vars, allow_recursion=20): if allow_recursion < 0: - raise ExctractorError(u'Recursion limit reached') + raise ExtractorError(u'Recursion limit reached') if stmt.startswith(u'var '): stmt = stmt[len(u'var '):] @@ -685,7 +687,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): v = - ((v ^ 0xffffffff) + 1) return v - def string(reader=None): + def read_string(reader=None): if reader is None: reader = code_reader slen = u30(reader) @@ -706,31 +708,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return res # minor_version + major_version - _ = read_bytes(2 + 2) + read_bytes(2 + 2) # Constant pool int_count = u30() for _c in range(1, int_count): - _ = s32() + s32() uint_count = u30() for _c in range(1, uint_count): - _ = u32() + u32() double_count = u30() - _ = read_bytes((double_count-1) * 8) + read_bytes((double_count-1) * 8) string_count = u30() constant_strings = [u''] for _c in range(1, string_count): - s = string() + s = read_string() constant_strings.append(s) namespace_count = u30() for _c in range(1, namespace_count): - _ = read_bytes(1) # kind - _ = u30() # name + read_bytes(1) # kind + u30() # name ns_set_count = u30() for _c in range(1, ns_set_count): count = u30() for _c2 in range(count): - _ = u30() + u30() multiname_count = u30() MULTINAME_SIZES = { 0x07: 2, # QName @@ -749,13 +751,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): kind = u30() assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind if kind == 0x07: - namespace_idx = u30() + u30() # namespace_idx name_idx = u30() multinames.append(constant_strings[name_idx]) else: multinames.append('[MULTINAME kind: %d]' % kind) for _c2 in range(MULTINAME_SIZES[kind]): - _ = u30() + u30() # Methods method_count = u30() @@ -765,32 +767,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): method_infos = [] for method_id in range(method_count): param_count = u30() - _ = u30() # return type + u30() # return type for _ in range(param_count): - _ = u30() # param type - _ = u30() # name index (always 0 for youtube) + u30() # param type + u30() # name index (always 0 for youtube) flags = read_byte() if flags & 0x08 != 0: # Options present option_count = u30() for c in range(option_count): - _ = u30() # val - _ = read_bytes(1) # kind + u30() # val + read_bytes(1) # kind if flags & 0x80 != 0: # Param names present for _ in range(param_count): - _ = u30() # param name + u30() # param name mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) method_infos.append(mi) # Metadata metadata_count = u30() for _c in range(metadata_count): - _ = u30() # name + u30() # name item_count = u30() for _c2 in range(item_count): - _ = u30() # key - _ = u30() # value + u30() # key + u30() # value def parse_traits_info(): trait_name_idx = u30() @@ -799,20 +801,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): attrs = kind_full >> 4 methods = {} if kind in [0x00, 0x06]: # Slot or Const - _ = u30() # Slot id - type_name_idx = u30() + u30() # Slot id + u30() # type_name_idx vindex = u30() if vindex != 0: - _ = read_byte() # vkind + read_byte() # vkind elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter - _ = u30() # disp_id + u30() # disp_id method_idx = u30() methods[multinames[trait_name_idx]] = method_idx elif kind == 0x04: # Class - _ = u30() # slot_id - _ = u30() # classi + u30() # slot_id + u30() # classi elif kind == 0x05: # Function - _ = u30() # slot_id + u30() # slot_id function_idx = u30() methods[function_idx] = multinames[trait_name_idx] else: @@ -821,7 +823,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if attrs & 0x4 != 0: # Metadata present metadata_count = u30() for _c3 in range(metadata_count): - _ = u30() + u30() # metadata index return methods @@ -835,17 +837,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if name_idx == searched_idx: # We found the class we're looking for! searched_class_id = class_id - _ = u30() # super_name idx + u30() # super_name idx flags = read_byte() if flags & 0x08 != 0: # Protected namespace is present - protected_ns_idx = u30() + u30() # protected_ns_idx intrf_count = u30() for _c2 in range(intrf_count): - _ = u30() - _ = u30() # iinit + u30() + u30() # iinit trait_count = u30() for _c2 in range(trait_count): - _ = parse_traits_info() + parse_traits_info() if searched_class_id is None: raise ExtractorError(u'Target class %r not found' % @@ -854,7 +856,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): method_names = {} method_idxs = {} for class_id in range(class_count): - _ = u30() # cinit + u30() # cinit trait_count = u30() for _c2 in range(trait_count): trait_methods = parse_traits_info() @@ -867,10 +869,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Scripts script_count = u30() for _c in range(script_count): - _ = u30() # init + u30() # init trait_count = u30() for _c2 in range(trait_count): - _ = parse_traits_info() + parse_traits_info() # Method bodies method_body_count = u30() @@ -878,10 +880,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): methods = {} for _c in range(method_body_count): method_idx = u30() - max_stack = u30() + u30() # max_stack local_count = u30() - init_scope_depth = u30() - max_scope_depth = u30() + u30() # init_scope_depth + u30() # max_scope_depth code_length = u30() code = read_bytes(code_length) if method_idx in method_idxs: @@ -889,14 +891,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): methods[method_idxs[method_idx]] = m exception_count = u30() for _c2 in range(exception_count): - _ = u30() # from - _ = u30() # to - _ = u30() # target - _ = u30() # exc_type - _ = u30() # var_name + u30() # from + u30() # to + u30() # target + u30() # exc_type + u30() # var_name trait_count = u30() for _c2 in range(trait_count): - _ = parse_traits_info() + parse_traits_info() assert p + code_reader.tell() == len(code_tag) assert len(methods) == len(method_idxs) @@ -1011,7 +1013,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): assert isinstance(obj, list) stack.append(obj[idx]) elif opcode == 128: # coerce - _ = u30(coder) + u30(coder) elif opcode == 133: # coerce_s assert isinstance(stack[-1], (type(None), compat_str)) elif opcode == 164: # modulo @@ -1055,7 +1057,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if self._downloader.params.get('youtube_print_sig_code'): self._print_sig_code(func, len(s)) return func(s) - except Exception as e: + except Exception: tb = traceback.format_exc() self._downloader.report_warning( u'Automatic signature extraction failed: ' + tb) From f8061589e66f12f6c2ffac3d7bfba2a7ac0294d5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 22 Sep 2013 10:50:12 +0200 Subject: [PATCH 061/103] [youtube] Actually pass in cachedir option --- youtube_dl/__init__.py | 3 ++- youtube_dl/extractor/youtube.py | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 072f69f2e..a4769a8ae 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -618,7 +618,8 @@ def _real_main(argv=None): 'min_filesize': opts.min_filesize, 'max_filesize': opts.max_filesize, 'daterange': date, - 'youtube_print_sig_code': opts.youtube_print_sig_code + 'cachedir': opts.cachedir, + 'youtube_print_sig_code': opts.youtube_print_sig_code, }) if opts.verbose: diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a9bfc455f..2dd2db673 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -438,7 +438,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): cache_dir = self._downloader.params.get('cachedir', u'~/.youtube-dl/cache') - if cache_dir != u'NONE': + cache_enabled = cache_dir != u'NONE' + if cache_enabled: cache_fn = os.path.join(os.path.expanduser(cache_dir), u'youtube-sigfuncs', func_id + '.json') @@ -465,7 +466,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: assert False, 'Invalid player type %r' % player_type - if cache_dir is not False: + if cache_enabled: try: cache_res = res(map(compat_chr, range(slen))) cache_spec = [ord(c) for c in cache_res] @@ -515,7 +516,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): cache_spec = [ord(c) for c in cache_res] expr_code = u' + '.join(gen_sig_code(cache_spec)) code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code) - self.to_screen(u'Extracted signature:\n' + code) + self.to_screen(u'Extracted signature function:\n' + code) def _parse_sig_js(self, jscode): funcname = self._search_regex( From c35f9e72ce842ecd476bee3767527da0e675dd1a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 22 Sep 2013 11:09:25 +0200 Subject: [PATCH 062/103] Move cachedir doc --- youtube_dl/FileDownloader.py | 2 -- youtube_dl/YoutubeDL.py | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 604714134..0b5a5d77d 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -39,8 +39,6 @@ class FileDownloader(object): test: Download only first bytes to test the downloader. min_filesize: Skip files smaller than this size max_filesize: Skip files larger than this size - cachedir: Location of the cache files in the filesystem. - "NONE" to disable filesystem cache. """ params = None diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index fa24ebe0d..ead1ccb1c 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -81,6 +81,8 @@ class YoutubeDL(object): keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file + cachedir: Location of the cache files in the filesystem. + "NONE" to disable filesystem cache. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: From 13dc64ce741520ba54ba9fff0ab1a3ac4e5c43a4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 22 Sep 2013 11:17:21 +0200 Subject: [PATCH 063/103] [youtube] Remove _decrypt_signature_age_gate --- youtube_dl/extractor/youtube.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2dd2db673..56ad33fdc 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1109,15 +1109,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) - def _decrypt_signature_age_gate(self, s): - # The videos with age protection use another player, so the algorithms - # can be different. - if len(s) == 86: - return s[2:63] + s[82] + s[64:82] + s[63] - else: - # Fallback to the other algortihms - return self._decrypt_signature(s) - def _get_available_subtitles(self, video_id): try: sub_list = self._download_webpage( From 45f4a76dbc268a56c212fe25cd27922541840cfe Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 22 Sep 2013 11:45:29 +0200 Subject: [PATCH 064/103] Work around nosetests nosiness --- test/test_youtube_signature.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 36533cf1f..5007d9a16 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -43,7 +43,7 @@ class TestSignature(unittest.TestCase): os.mkdir(self.TESTDATA_DIR) -def make_testfunc(url, stype, sig_length, expected_sig): +def make_tfunc(url, stype, sig_length, expected_sig): basename = url.rpartition('/')[2] m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) assert m, '%r should follow URL format' % basename @@ -73,7 +73,7 @@ def make_testfunc(url, stype, sig_length, expected_sig): setattr(TestSignature, test_func.__name__, test_func) for test_spec in _TESTS: - make_testfunc(*test_spec) + make_tfunc(*test_spec) if __name__ == '__main__': From bdde940e90320e350bd96df621ee7e32641e1eca Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 22 Sep 2013 12:17:42 +0200 Subject: [PATCH 065/103] [youtube] Improve flash player URL handling --- youtube_dl/extractor/youtube.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 56ad33fdc..888907c93 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1437,10 +1437,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): encrypted_sig = url_data['s'][0] if self._downloader.params.get('verbose'): if age_gate: - player_version = self._search_regex( - r'-(.+)\.swf$', - player_url if player_url else None, - 'flash player', fatal=False) + if player_url is None: + player_version = 'unknown' + else: + player_version = self._search_regex( + r'-(.+)\.swf$', player_url, + u'flash player', fatal=False) player_desc = 'flash player %s' % player_version else: player_version = self._search_regex( From d2d8f895310be7fa302ba7755c60d5948866fcaa Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 22 Sep 2013 12:18:10 +0200 Subject: [PATCH 066/103] Do not warn if fallback is without alternatives (because we did not get the flash player URL) --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 888907c93..780690ed0 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1063,8 +1063,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self._downloader.report_warning( u'Automatic signature extraction failed: ' + tb) - self._downloader.report_warning( - u'Warning: Falling back to static signature algorithm') + self._downloader.report_warning( + u'Warning: Falling back to static signature algorithm') return self._static_decrypt_signature( s, video_id, player_url, age_gate) From c705320f485cd962827fce464a93993569e3173f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 22 Sep 2013 12:18:16 +0200 Subject: [PATCH 067/103] Correct test strings --- youtube_dl/extractor/youtube.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 780690ed0..049da2f91 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -468,7 +468,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if cache_enabled: try: - cache_res = res(map(compat_chr, range(slen))) + test_string = u''.join(map(compat_chr, range(slen))) + cache_res = res(test_string) cache_spec = [ord(c) for c in cache_res] try: os.makedirs(os.path.dirname(cache_fn)) @@ -512,7 +513,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: yield _genslice(start, i, step) - cache_res = func(map(compat_chr, range(slen))) + test_string = u''.join(map(compat_chr, range(slen))) + cache_res = func(test_string) cache_spec = [ord(c) for c in cache_res] expr_code = u' + '.join(gen_sig_code(cache_spec)) code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code) From 4ae720042c3959cae856ce93578a0ba4b5817870 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 22 Sep 2013 23:31:39 +0200 Subject: [PATCH 068/103] Include the eta and the speed in the progress hooks Useful when listening to the progress hook, for example in a GUI. --- youtube_dl/FileDownloader.py | 45 ++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 0b5a5d77d..706592988 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -77,26 +77,43 @@ class FileDownloader(object): @staticmethod def calc_percent(byte_counter, data_len): if data_len is None: + return None + return float(byte_counter) / float(data_len) * 100.0 + + @staticmethod + def format_percent(percent): + if percent is None: return '---.-%' - return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0)) + return '%6s' % ('%3.1f%%' % percent) @staticmethod def calc_eta(start, now, total, current): if total is None: - return '--:--' + return None dif = now - start if current == 0 or dif < 0.001: # One millisecond - return '--:--' + return None rate = float(current) / dif - eta = int((float(total) - float(current)) / rate) + return int((float(total) - float(current)) / rate) + + @staticmethod + def format_eta(eta): + if eta is None: + return '--:--' return FileDownloader.format_seconds(eta) @staticmethod def calc_speed(start, now, bytes): dif = now - start if bytes == 0 or dif < 0.001: # One millisecond + return None + return float(bytes) / dif + + @staticmethod + def format_speed(speed): + if speed is None: return '%10s' % '---b/s' - return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif)) + return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed)) @staticmethod def best_block_size(elapsed_time, bytes): @@ -205,11 +222,14 @@ class FileDownloader(object): """Report destination filename.""" self.to_screen(u'[download] Destination: ' + filename) - def report_progress(self, percent_str, data_len_str, speed_str, eta_str): + def report_progress(self, percent, data_len_str, speed, eta): """Report download progress.""" if self.params.get('noprogress', False): return clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'') + eta_str = self.format_eta(eta) + percent_str = self.format_percent(percent) + speed_str = self.format_speed(speed) if self.params.get('progress_with_newline', False): self.to_screen(u'[download] %s of %s at %s ETA %s' % (percent_str, data_len_str, speed_str, eta_str)) @@ -524,13 +544,14 @@ class FileDownloader(object): block_size = self.best_block_size(after - before, len(data_block)) # Progress message - speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) + speed = self.calc_speed(start, time.time(), byte_counter - resume_len) if data_len is None: self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA') + eta = None else: - percent_str = self.calc_percent(byte_counter, data_len) - eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) - self.report_progress(percent_str, data_len_str, speed_str, eta_str) + percent = self.calc_percent(byte_counter, data_len) + eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) + self.report_progress(percent, data_len_str, speed, eta) self._hook_progress({ 'downloaded_bytes': byte_counter, @@ -538,6 +559,8 @@ class FileDownloader(object): 'tmpfilename': tmpfilename, 'filename': filename, 'status': 'downloading', + 'eta': eta, + 'speed': speed, }) # Apply rate limit @@ -580,6 +603,8 @@ class FileDownloader(object): * downloaded_bytes: Bytes on disks * total_bytes: Total bytes, None if unknown * tmpfilename: The filename we're currently writing to + * eta: The estimated time in seconds, None if unknown + * speed: The download speed in bytes/second, None if unknown Hooks are guaranteed to be called at least once (with status "finished") if the download is successful. From dd5d2eb03c3673cff5a27cc34c0271085002583e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 22 Sep 2013 23:39:30 +0200 Subject: [PATCH 069/103] If the file is already downloaded include the size in the progress hook --- youtube_dl/FileDownloader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 706592988..d6673fd3a 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -398,6 +398,7 @@ class FileDownloader(object): self._hook_progress({ 'filename': filename, 'status': 'finished', + 'total_bytes': os.path.getsize(encodeFilename(filename)), }) return True From 81ec7c7901ddfe9366cf1af010eb31b906dcfce0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 23 Sep 2013 11:24:10 +0200 Subject: [PATCH 070/103] [facebook] Allow untitled videos (Fixes #1484) --- youtube_dl/extractor/facebook.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index beaa5b4bd..9d1bc0751 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -106,8 +106,8 @@ class FacebookIE(InfoExtractor): video_duration = int(video_data['video_duration']) thumbnail = video_data['thumbnail_src'] - video_title = self._html_search_regex('

([^<]+)

', - webpage, u'title') + video_title = self._html_search_regex( + r'

([^<]*)

', webpage, u'title') info = { 'id': video_id, From a825f33030f189a37b1c3517ed1770a8b9e274fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 23 Sep 2013 21:28:33 +0200 Subject: [PATCH 071/103] [francetv] Add an extractor for France2 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/francetv.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 949f59a44..65aacebb3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -42,6 +42,7 @@ from .flickr import FlickrIE from .francetv import ( PluzzIE, FranceTvInfoIE, + France2IE, ) from .freesound import FreesoundIE from .funnyordie import FunnyOrDieIE diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index b8fe82e47..5e915bc03 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -65,3 +65,25 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): webpage = self._download_webpage(url, page_title) video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id') return self._extract_video(video_id) + + +class France2IE(FranceTVBaseInfoExtractor): + IE_NAME = u'france2.fr' + _VALID_URL = r'https?://www\.france2\.fr/emissions/.*?/videos/(?P\d+)' + + _TEST = { + u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', + u'file': u'75540104.mp4', + u'info_dict': { + u'title': u'13h15, le samedi...', + u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d', + }, + u'params': { + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + return self._extract_video(video_id) From 5b333c1ce6287badd89dacdd280a3876a09dcbcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 23 Sep 2013 21:41:54 +0200 Subject: [PATCH 072/103] [francetv] Add an extractor for Generation Quoi (closes #1475) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/francetv.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 65aacebb3..d1b7e5f99 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -43,6 +43,7 @@ from .francetv import ( PluzzIE, FranceTvInfoIE, France2IE, + GenerationQuoiIE ) from .freesound import FreesoundIE from .funnyordie import FunnyOrDieIE diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 5e915bc03..b1530e549 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -1,6 +1,7 @@ # encoding: utf-8 import re import xml.etree.ElementTree +import json from .common import InfoExtractor from ..utils import ( @@ -87,3 +88,30 @@ class France2IE(FranceTVBaseInfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') return self._extract_video(video_id) + + +class GenerationQuoiIE(InfoExtractor): + IE_NAME = u'http://generation-quoi.france2.fr' + _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P.*)(\?|$)' + + _TEST = { + u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous', + u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4', + u'info_dict': { + u'title': u'Génération Quoi - Garde à Vous', + u'uploader': u'Génération Quoi', + }, + u'params': { + # It uses Dailymotion + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name) + info_json = self._download_webpage(info_url, name) + info = json.loads(info_json) + return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], + ie='Dailymotion') From 6f56389b8836301fc64f849e43ebd05043c0a66d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 24 Sep 2013 21:02:00 +0200 Subject: [PATCH 073/103] [youtube] update algos for length 86 and 84 (fixes #1494) --- devscripts/youtube_genalgo.py | 8 ++++---- youtube_dl/extractor/youtube.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index f91e8855d..3b90a2fed 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -27,15 +27,15 @@ tests = [ # 87 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"), - # 86 - vfluy6kdb 2013/09/06 + # 86 - vflHql6Pr 2013/09/24 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", - "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"), + ";}|[{=+-d)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYT_EWQ0987654321mnbvcxzas/fghjklpoiuytrewq"), # 85 - vflkuzxcs 2013/09/11 ('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[', '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'), - # 84 - vflg0g8PQ 2013/08/29 (sporadic) + # 84 - vflHql6Pr 2013/09/24 (sporadic) ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", - ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"), + "}[{=+-_)g*&^%$#@!MNBVCXZASDFGHJKLPOIUYTRE(Q0987654321mnbvcxzasdf?hjklpoiuytrewq"), # 83 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 47d5cb7ff..ec1cf8d30 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -431,11 +431,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif len(s) == 87: return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] elif len(s) == 86: - return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53] + return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[:16][::-1] elif len(s) == 85: return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] elif len(s) == 84: - return s[81:36:-1] + s[0] + s[35:2:-1] + return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1] elif len(s) == 83: return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] elif len(s) == 82: From bb0eee71e7b7519321694f3d68875bbd71affeb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 24 Sep 2013 21:04:13 +0200 Subject: [PATCH 074/103] [youtube] Update one of the test's description --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ec1cf8d30..606ed21c9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -352,7 +352,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): u"info_dict": { u"upload_date": u"20120506", u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", - u"description": u"md5:3e2666e0a55044490499ea45fe9037b7", + u"description": u"md5:bdac09887d209a4ed54b8f76b2bdaa8b", u"uploader": u"Icona Pop", u"uploader_id": u"IconaPop" } From c3c88a2664595fd62898e44f8fc93c84e6d3c5a4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 24 Sep 2013 21:04:43 +0200 Subject: [PATCH 075/103] Allow opts.cachedir == None to disable cache --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ead1ccb1c..a3a351ee6 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -82,7 +82,7 @@ class YoutubeDL(object): daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file cachedir: Location of the cache files in the filesystem. - "NONE" to disable filesystem cache. + None to disable filesystem cache. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a4769a8ae..ebf4a300f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -618,7 +618,7 @@ def _real_main(argv=None): 'min_filesize': opts.min_filesize, 'max_filesize': opts.max_filesize, 'daterange': date, - 'cachedir': opts.cachedir, + 'cachedir': opts.cachedir if opts.cachedir != 'NONE' else None, 'youtube_print_sig_code': opts.youtube_print_sig_code, }) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 049da2f91..a6eefdf4e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -438,7 +438,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): cache_dir = self._downloader.params.get('cachedir', u'~/.youtube-dl/cache') - cache_enabled = cache_dir != u'NONE' + cache_enabled = cache_dir is not None if cache_enabled: cache_fn = os.path.join(os.path.expanduser(cache_dir), u'youtube-sigfuncs', From e35e4ddc9a4605a63a06c5bb12055bfceacb50b8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 24 Sep 2013 21:18:03 +0200 Subject: [PATCH 076/103] Fix output of --youtube-print-sig-code when counting down to 0 --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a6eefdf4e..148b20160 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -488,8 +488,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def gen_sig_code(idxs): def _genslice(start, end, step): starts = u'' if start == 0 else str(start) - ends = u':%d' % (end+step) - steps = u'' if step == 1 else (':%d' % step) + ends = (u':%d' % (end+step)) if end + step >= 0 else u':' + steps = u'' if step == 1 else (u':%d' % step) return u's[%s%s%s]' % (starts, ends, steps) step = None From f2c327fd39d10115573d709f94f20721a80895fb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 24 Sep 2013 21:20:42 +0200 Subject: [PATCH 077/103] Fix 86 signature (#1494) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 148b20160..e883a2c54 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1092,7 +1092,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif len(s) == 87: return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] elif len(s) == 86: - return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53] + return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1] elif len(s) == 85: return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] elif len(s) == 84: From 7f747732547fedc876bcdcc77ba53a56324d7e87 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 24 Sep 2013 21:26:10 +0200 Subject: [PATCH 078/103] Add option --no-cache-dir --- youtube_dl/__init__.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ebf4a300f..46d0fbd64 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -167,7 +167,12 @@ def parseOpts(overrideArguments=None): help='Output descriptions of all supported extractors', default=False) general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') - general.add_option('--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache', help='Location in the filesystem where youtube-dl can store downloaded information permanently. NONE to disable filesystem caching, %default by default') + general.add_option( + '--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache', + help='Location in the filesystem where youtube-dl can store downloaded information permanently. %default by default') + general.add_option( + '--no-cache-dir', action='store_const', const=None, dest='cachedir', + help='Disable filesystem caching') selection.add_option('--playlist-start', @@ -560,7 +565,7 @@ def _real_main(argv=None): parser.error(u'Cannot download a video and extract audio into the same' u' file! Use "%%(ext)s" instead of %r' % determine_ext(outtmpl, u'')) - + raise ValueError(repr(opts.cachedir)) # YoutubeDL ydl = YoutubeDL({ 'usenetrc': opts.usenetrc, @@ -618,7 +623,7 @@ def _real_main(argv=None): 'min_filesize': opts.min_filesize, 'max_filesize': opts.max_filesize, 'daterange': date, - 'cachedir': opts.cachedir if opts.cachedir != 'NONE' else None, + 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, }) From 2cdeb20135d31ec568f016108d15735bfca33c10 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 24 Sep 2013 21:28:06 +0200 Subject: [PATCH 079/103] release 2013.09.24 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 88d70b47a..e33421216 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.09.20.1' +__version__ = '2013.09.24' From e80d8610645232583b5aec93fcd446fa67152d0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 24 Sep 2013 21:38:37 +0200 Subject: [PATCH 080/103] Revert "[southparkstudios] Fix mgid extraction" This reverts commit 0fd49457f5257dbe317c69314ee57a6c485d41a3. It seems that the redesign was temporary. --- youtube_dl/extractor/southparkstudios.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py index 1a611d3bb..b1e96b679 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southparkstudios.py @@ -14,7 +14,7 @@ class SouthParkStudiosIE(MTVIE): u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', u'info_dict': { u'title': u'Bat Daded', - u'description': u'Randy finally gets the chance to fight Bat Dad and gets the boys disqualified from the season championships.', + u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.', }, } @@ -33,6 +33,6 @@ class SouthParkStudiosIE(MTVIE): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - mgid = self._search_regex(r'data-mgid="(mgid:.*?)"', + mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"', webpage, u'mgid') return self._get_videos_info(mgid) From 8b25323ae2a6e144bdb7e46f60960a83487a8fda Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 24 Sep 2013 21:40:47 +0200 Subject: [PATCH 081/103] release 2013.09.24.1 --- README.md | 4 ++++ youtube_dl/version.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f54945acc..fc8070c37 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,10 @@ which means you can modify it, redistribute it or use it however you like. --extractor-descriptions Output descriptions of all supported extractors --proxy URL Use the specified HTTP/HTTPS proxy --no-check-certificate Suppress HTTPS certificate validation. + --cache-dir None Location in the filesystem where youtube-dl can + store downloaded information permanently. + ~/.youtube-dl/cache by default + --no-cache-dir Disable filesystem caching ## Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e33421216..cd39f658b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.09.24' +__version__ = '2013.09.24.1' From 29c7a63df864cb0982119cec35677dbe568909c9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 24 Sep 2013 21:55:25 +0200 Subject: [PATCH 082/103] Remove debugging code --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 46d0fbd64..3851fc0a6 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -565,7 +565,7 @@ def _real_main(argv=None): parser.error(u'Cannot download a video and extract audio into the same' u' file! Use "%%(ext)s" instead of %r' % determine_ext(outtmpl, u'')) - raise ValueError(repr(opts.cachedir)) + # YoutubeDL ydl = YoutubeDL({ 'usenetrc': opts.usenetrc, From b98d6a1e19dd8b7a5a45806aa21faad1f33c1515 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 24 Sep 2013 21:55:34 +0200 Subject: [PATCH 083/103] release 2013.09.24.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cd39f658b..8e6356dab 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.09.24.1' +__version__ = '2013.09.24.2' From 592882aa9f889432b07ad487f1a4228c9ae12818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 26 Sep 2013 13:53:57 +0200 Subject: [PATCH 084/103] [brightcove] Support videos that only provide flv versions (fixes #1504) Moved the test from generic.py to brightcove.py --- youtube_dl/extractor/brightcove.py | 62 +++++++++++++++++++++++------- youtube_dl/extractor/generic.py | 11 ------ 2 files changed, 49 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 71e3c7883..859baae75 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -1,3 +1,5 @@ +# encoding: utf-8 + import re import json import xml.etree.ElementTree @@ -7,15 +9,37 @@ from ..utils import ( compat_urllib_parse, find_xpath_attr, compat_urlparse, + + ExtractorError, ) class BrightcoveIE(InfoExtractor): _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P.*)' _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' - - # There is a test for Brigtcove in GenericIE, that way we test both the download - # and the detection of videos, and we don't have to find an URL that is always valid + + _TESTS = [ + { + u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/', + u'file': u'2371591881001.mp4', + u'md5': u'9e80619e0a94663f0bdc849b4566af19', + u'note': u'Test Brightcove downloads and detection in GenericIE', + u'info_dict': { + u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', + u'uploader': u'8TV', + u'description': u'md5:a950cc4285c43e44d763d036710cd9cd', + } + }, + { + u'url': u'http://medianetwork.oracle.com/video/player/1785452137001', + u'file': u'1785452137001.flv', + u'info_dict': { + u'title': u'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', + u'description': u'John Rose speaks at the JVM Language Summit, August 1, 2012.', + u'uploader': u'Oracle', + }, + }, + ] @classmethod def _build_brighcove_url(cls, object_str): @@ -72,15 +96,27 @@ class BrightcoveIE(InfoExtractor): playlist_title=playlist_info['mediaCollectionDTO']['displayName']) def _extract_video_info(self, video_info): - renditions = video_info['renditions'] - renditions = sorted(renditions, key=lambda r: r['size']) - best_format = renditions[-1] + info = { + 'id': video_info['id'], + 'title': video_info['displayName'], + 'description': video_info.get('shortDescription'), + 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), + 'uploader': video_info.get('publisherName'), + } - return {'id': video_info['id'], - 'title': video_info['displayName'], - 'url': best_format['defaultURL'], + renditions = video_info.get('renditions') + if renditions: + renditions = sorted(renditions, key=lambda r: r['size']) + best_format = renditions[-1] + info.update({ + 'url': best_format['defaultURL'], 'ext': 'mp4', - 'description': video_info.get('shortDescription'), - 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), - 'uploader': video_info.get('publisherName'), - } + }) + elif video_info.get('FLVFullLengthURL') is not None: + info.update({ + 'url': video_info['FLVFullLengthURL'], + 'ext': 'flv', + }) + else: + raise ExtractorError(u'Unable to extract video url for %s' % info['id']) + return info diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f92e61fea..764070635 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -29,17 +29,6 @@ class GenericIE(InfoExtractor): u"title": u"R\u00e9gis plante sa Jeep" } }, - { - u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/', - u'file': u'2371591881001.mp4', - u'md5': u'9e80619e0a94663f0bdc849b4566af19', - u'note': u'Test Brightcove downloads and detection in GenericIE', - u'info_dict': { - u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', - u'uploader': u'8TV', - u'description': u'md5:a950cc4285c43e44d763d036710cd9cd', - } - }, ] def report_download_webpage(self, video_id): From 4de1994b6ed61a2aaddeee6452959d645fe5954b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 26 Sep 2013 18:59:56 +0200 Subject: [PATCH 085/103] [brightcove] Use direct url for the tests The test_all_urls.py test failed because BrightcoveIE doesn't match them. --- youtube_dl/extractor/brightcove.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 859baae75..558b3d009 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -20,7 +20,8 @@ class BrightcoveIE(InfoExtractor): _TESTS = [ { - u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/', + # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ + u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', u'file': u'2371591881001.mp4', u'md5': u'9e80619e0a94663f0bdc849b4566af19', u'note': u'Test Brightcove downloads and detection in GenericIE', @@ -31,7 +32,8 @@ class BrightcoveIE(InfoExtractor): } }, { - u'url': u'http://medianetwork.oracle.com/video/player/1785452137001', + # From http://medianetwork.oracle.com/video/player/1785452137001 + u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', u'file': u'1785452137001.flv', u'info_dict': { u'title': u'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', From ce65fb6c76e4496a35cd597bbc735e0351d82853 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 27 Sep 2013 05:50:16 +0200 Subject: [PATCH 086/103] [RTLnowIE] Add support for http://rtlnitronow.de --- youtube_dl/extractor/rtlnow.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index 7bb236c2b..963e0cc8f 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -8,8 +8,8 @@ from ..utils import ( ) class RTLnowIE(InfoExtractor): - """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW""" - _VALID_URL = r'(?:http://)?(?P(?Prtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' + """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW and VOX NOW""" + _VALID_URL = r'(?:http://)?(?P(?Prtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?rtlnitronow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' _TESTS = [{ u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', u'file': u'90419.flv', @@ -61,6 +61,19 @@ class RTLnowIE(InfoExtractor): u'params': { u'skip_download': True, }, + }, + { + u'url': u'http://www.rtlnitronow.de/recht-ordnung/fahrradpolizei-koeln-fischereiaufsicht-ruegen.php?film_id=124311&player=1&season=1', + u'file': u'124311.flv', + u'info_dict': { + u'upload_date': u'20130830', + u'title': u'Recht & Ordnung - Fahrradpolizei Köln & Fischereiaufsicht Rügen', + u'description': u'Fahrradpolizei Köln & Fischereiaufsicht Rügen', + u'thumbnail': u'http://autoimg.static-fra.de/nitronow/338273/1500x1500/image2.jpg' + }, + u'params': { + u'skip_download': True, + }, }] def _real_extract(self,url): From 63efc427cd4a2e0892e02e0519134d760b30814a Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 27 Sep 2013 06:00:37 +0200 Subject: [PATCH 087/103] [RTLnowIE] Clean video title The title of some videos has the following format: Series - Episode | Series online schauen bei ... NOW --- youtube_dl/extractor/rtlnow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index 7bb236c2b..3783aa538 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -79,7 +79,7 @@ class RTLnowIE(InfoExtractor): msg = clean_html(note_m.group(1)) raise ExtractorError(msg) - video_title = self._html_search_regex(r'(?P<title>[^<]+)', + video_title = self._html_search_regex(r'(?P<title>[^<]+?)( \| [^<]*)?', webpage, u'title') playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P[^\']+)\'', webpage, u'playerdata_url') From 920de7a27d11a8f162e108c5891de70db738693a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 27 Sep 2013 06:15:21 +0200 Subject: [PATCH 088/103] [youtube] Fix 83 signature (Closes #1511) --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6beda8f3b..89c41efe5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1067,6 +1067,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self._downloader.report_warning( u'Warning: Falling back to static signature algorithm') + return self._static_decrypt_signature( s, video_id, player_url, age_gate) @@ -1098,7 +1099,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif len(s) == 84: return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1] elif len(s) == 83: - return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] + return s[80:63:-1] + s[0] + s[62:0:-1] + s[63] elif len(s) == 82: return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54] elif len(s) == 81: From 74bab3f0a4b601a7618f279afbd352bbc51dc3ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 27 Sep 2013 08:08:43 +0200 Subject: [PATCH 089/103] Don't embed subtitles if the list is empty or the field is not set (fixes #1510) --- youtube_dl/PostProcessor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index ae56d2082..3ee1d3c58 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -444,8 +444,11 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): if information['ext'] != u'mp4': self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files') return True, information - sub_langs = [key for key in information['subtitles']] + if not information.get('subtitles'): + self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed') + return True, information + sub_langs = [key for key in information['subtitles']] filename = information['filepath'] input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs] From 509f398292ff4b9dffecd3a85cd02b4922319b13 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 27 Sep 2013 13:08:45 +0200 Subject: [PATCH 090/103] Remove youtube_genalgo (#1515) With the automatic signature extraction, this script has become superfluous now --- devscripts/youtube_genalgo.py | 116 ---------------------------------- 1 file changed, 116 deletions(-) delete mode 100644 devscripts/youtube_genalgo.py diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py deleted file mode 100644 index 3b90a2fed..000000000 --- a/devscripts/youtube_genalgo.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - -# Generate youtube signature algorithm from test cases - -import sys - -tests = [ - # 93 - vfl79wBKW 2013/07/20 - (u"qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"€", - u".>/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ098765'321mnbvcxzasdfghjklpoiu"), - # 92 - vflQw-fB4 2013/07/17 - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"", - "mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"), - # 91 - vfl79wBKW 2013/07/20 (sporadic) - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~", - "/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543.1mnbvcxzasdfghjklpoiu"), - # 90 - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`", - "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"), - # 89 - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'", - "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"), - # 88 - vflapUV9V 2013/08/28 - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", - "ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"), - # 87 - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", - "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"), - # 86 - vflHql6Pr 2013/09/24 - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", - ";}|[{=+-d)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYT_EWQ0987654321mnbvcxzas/fghjklpoiuytrewq"), - # 85 - vflkuzxcs 2013/09/11 - ('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[', - '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'), - # 84 - vflHql6Pr 2013/09/24 (sporadic) - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", - "}[{=+-_)g*&^%$#@!MNBVCXZASDFGHJKLPOIUYTRE(Q0987654321mnbvcxzasdf?hjklpoiuytrewq"), - # 83 - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", - ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), - # 82 - vflGNjMhJ 2013/09/12 - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", - ".>/?;}[<=+-(*&^%$#@!MNBVCXeASDFGHKLPOqUYTREWQ0987654321mnbvcxzasdfghjklpoiuytrIwZ"), - # 81 - vflLC8JvQ 2013/07/25 - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", - "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), - # 80 - vflZK4ZYR 2013/08/23 (sporadic) - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>", - "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"), - # 79 - vflLC8JvQ 2013/07/25 (sporadic) - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/", - "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), -] - -tests_age_gate = [ - # 86 - vflqinMWD - ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", - "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), -] - -def find_matching(wrong, right): - idxs = [wrong.index(c) for c in right] - return compress(idxs) - return ('s[%d]' % i for i in idxs) - -def compress(idxs): - def _genslice(start, end, step): - starts = '' if start == 0 else str(start) - ends = ':%d' % (end+step) - steps = '' if step == 1 else (':%d' % step) - return 's[%s%s%s]' % (starts, ends, steps) - - step = None - for i, prev in zip(idxs[1:], idxs[:-1]): - if step is not None: - if i - prev == step: - continue - yield _genslice(start, prev, step) - step = None - continue - if i - prev in [-1, 1]: - step = i - prev - start = prev - continue - else: - yield 's[%d]' % prev - if step is None: - yield 's[%d]' % i - else: - yield _genslice(start, i, step) - -def _assert_compress(inp, exp): - res = list(compress(inp)) - if res != exp: - print('Got %r, expected %r' % (res, exp)) - assert res == exp -_assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]']) -_assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]']) -_assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]']) - -def gen(wrong, right, indent): - code = ' + '.join(find_matching(wrong, right)) - return 'if len(s) == %d:\n%s return %s\n' % (len(wrong), indent, code) - -def genall(tests): - indent = ' ' * 8 - return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests) - -def main(): - print(genall(tests)) - print(u' Age gate:') - print(genall(tests_age_gate)) - -if __name__ == '__main__': - main() From 0a60edcfa975e4f791923574b3f888e3ffe72c43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 27 Sep 2013 14:19:19 +0200 Subject: [PATCH 091/103] Don't fail if the video thumbnail couldn't be downloaded (fixes #1516) Just report a warning --- youtube_dl/YoutubeDL.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a3a351ee6..44a272e7e 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -545,11 +545,15 @@ class YoutubeDL(object): thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format self.to_screen(u'[%s] %s: Downloading thumbnail ...' % (info_dict['extractor'], info_dict['id'])) - uf = compat_urllib_request.urlopen(info_dict['thumbnail']) - with open(thumb_filename, 'wb') as thumbf: - shutil.copyfileobj(uf, thumbf) - self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % - (info_dict['extractor'], info_dict['id'], thumb_filename)) + try: + uf = compat_urllib_request.urlopen(info_dict['thumbnail']) + with open(thumb_filename, 'wb') as thumbf: + shutil.copyfileobj(uf, thumbf) + self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % + (info_dict['extractor'], info_dict['id'], thumb_filename)) + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self.report_warning(u'Unable to download thumbnail "%s": %s' % + (info_dict['thumbnail'], compat_str(err))) if not self.params.get('skip_download', False): if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): From 2dc592991aac5e0b3b91e3d2123490184033177e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 27 Sep 2013 14:20:52 +0200 Subject: [PATCH 092/103] [youtube] update description of test --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 89c41efe5..9aee2ebf2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -361,7 +361,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): u"info_dict": { u"upload_date": u"20120506", u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", - u"description": u"md5:bdac09887d209a4ed54b8f76b2bdaa8b", + u"description": u"md5:5b292926389560516e384ac437c0ec07", u"uploader": u"Icona Pop", u"uploader_id": u"IconaPop" } From f490e77e77c9db082e073f002088d021b16513ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 27 Sep 2013 14:22:36 +0200 Subject: [PATCH 093/103] [youtube] Set the thumbnail to None if it can't be extracted --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9aee2ebf2..618d87515 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1360,7 +1360,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): video_thumbnail = m_thumb.group(1) elif 'thumbnail_url' not in video_info: self._downloader.report_warning(u'unable to extract video thumbnail') - video_thumbnail = '' + video_thumbnail = None else: # don't panic if we can't find it video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0]) From 9abb32045a85e1ecc831c624494ad41af3997e20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 27 Sep 2013 15:06:27 +0200 Subject: [PATCH 094/103] [youtube] Add hlsvp to the error message if it can't be found and remove the live stream test It's no longer available, other olympics streams have the same problem. --- youtube_dl/extractor/youtube.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 618d87515..53f13b516 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -378,21 +378,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): u"uploader_id": u"justintimberlakeVEVO" } }, - { - u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE', - u'file': u'TGi3HqYrWHE.mp4', - u'note': u'm3u8 video', - u'info_dict': { - u'title': u'Triathlon - Men - London 2012 Olympic Games', - u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games', - u'uploader': u'olympic', - u'upload_date': u'20120807', - u'uploader_id': u'olympic', - }, - u'params': { - u'skip_download': True, - }, - }, ] @@ -1480,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return else: - raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info') + raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') results = [] for format_param, video_real_url in video_url_list: From 0b7c2485b66d53ad14bc331e867927b370599e43 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 28 Sep 2013 15:43:34 +0200 Subject: [PATCH 095/103] [zdf] Add support for hash URLs and simplify (#1518) --- youtube_dl/extractor/zdf.py | 74 +++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 418509cb9..faed7ff7f 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -2,16 +2,14 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, ExtractorError, - unescapeHTML, ) + class ZDFIE(InfoExtractor): - _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek\/(.*beitrag\/video\/)(?P[^/\?]+)(?:\?.*)?' - _TITLE = r'(?P.*)</h1>' + _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?' _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>' - _MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"' - _RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -19,6 +17,9 @@ class ZDFIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) video_id = mobj.group('video_id') + if mobj.group('hash'): + url = url.replace(u'#', u'', 1) + html = self._download_webpage(url, video_id) streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)] if streams is None: @@ -27,39 +28,48 @@ class ZDFIE(InfoExtractor): # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url # choose first/default media type and highest quality for now - for s in streams: #find 300 - dsl1000mbit - if s['quality'] == '300' and s['media_type'] == 'wstreaming': - stream_=s - break - for s in streams: #find veryhigh - dsl2000mbit - if s['quality'] == 'veryhigh' and s['media_type'] == 'wstreaming': # 'hstreaming' - rtsp is not working - stream_=s - break - if stream_ is None: + def stream_pref(s): + TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming'] + try: + type_pref = TYPE_ORDER.index(s['media_type']) + except ValueError: + type_pref = 999 + + QUALITY_ORDER = ['veryhigh', '300'] + try: + quality_pref = QUALITY_ORDER.index(s['quality']) + except ValueError: + quality_pref = 999 + + return (type_pref, quality_pref) + + sorted_streams = sorted(streams, key=stream_pref) + if not sorted_streams: raise ExtractorError(u'No stream found.') + stream = sorted_streams[0] - media_link = self._download_webpage(stream_['video_url'], video_id,'Get stream URL') + media_link = self._download_webpage( + stream['video_url'], + video_id, + u'Get stream URL') - self.report_extraction(video_id) - mobj = re.search(self._TITLE, html) - if mobj is None: - raise ExtractorError(u'Cannot extract title') - title = unescapeHTML(mobj.group('title')) + MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"' + RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)' - mobj = re.search(self._MMS_STREAM, media_link) + mobj = re.search(self._MEDIA_STREAM, media_link) if mobj is None: - mobj = re.search(self._RTSP_STREAM, media_link) + mobj = re.search(RTSP_STREAM, media_link) if mobj is None: raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL') - mms_url = mobj.group('video_url') + video_url = mobj.group('video_url') - mobj = re.search('(.*)[.](?P<ext>[^.]+)', mms_url) - if mobj is None: - raise ExtractorError(u'Cannot extract extention') - ext = mobj.group('ext') + title = self._html_search_regex( + r'<h1(?: class="beitragHeadline")?>(.*?)</h1>', + html, u'title') - return [{'id': video_id, - 'url': mms_url, - 'title': title, - 'ext': ext - }] + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'ext': determine_ext(video_url) + } From 9c15e9de849641143e7654f2656c68e066fe9e2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 28 Sep 2013 21:19:52 +0200 Subject: [PATCH 096/103] [yahoo] Fix video extraction (fixes #1521) There's no need to use two different methods. Now we can also download videos over http if possible. Also run the test for rtmp videos, but skip the download. --- youtube_dl/extractor/yahoo.py | 132 +++++++++++++++++----------------- 1 file changed, 65 insertions(+), 67 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 32d5b9477..39126e631 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -1,4 +1,3 @@ -import datetime import itertools import json import re @@ -6,86 +5,85 @@ import re from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( compat_urllib_parse, - - ExtractorError, + compat_urlparse, + determine_ext, + clean_html, ) + class YahooIE(InfoExtractor): IE_DESC = u'Yahoo screen' _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html' - _TEST = { - u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', - u'file': u'214727115.flv', - u'md5': u'2e717f169c1be93d84d3794a00d4a325', - u'info_dict': { - u"title": u"Julian Smith & Travis Legg Watch Julian Smith" + _TESTS = [ + { + u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', + u'file': u'214727115.mp4', + u'info_dict': { + u'title': u'Julian Smith & Travis Legg Watch Julian Smith', + u'description': u'Julian and Travis watch Julian Smith', + }, }, - u'skip': u'Requires rtmpdump' - } + { + u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', + u'file': u'103000935.flv', + u'info_dict': { + u'title': u'The Cougar Lies with Spanish Moss', + u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?', + }, + u'params': { + # Requires rtmpdump + u'skip_download': True, + }, + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - m_id = re.search(r'YUI\.namespace\("Media"\)\.CONTENT_ID = "(?P<new_id>.+?)";', webpage) - if m_id is None: - # TODO: Check which url parameters are required - info_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;lmsoverride=1;outputformat=mrss;cb=974419660;id=%s;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id - webpage = self._download_webpage(info_url, video_id, u'Downloading info webpage') - info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]>.* - .*?)\]\]>.* - .*?)\ .*\]\]>.* - Date: Sun, 29 Sep 2013 12:44:02 +0200 Subject: [PATCH 097/103] [dailymotion] Disable the family filter in the playlists (fixes #1524) --- youtube_dl/extractor/dailymotion.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 64b89aae8..3f012aedc 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -14,8 +14,15 @@ from ..utils import ( ExtractorError, ) +class DailymotionBaseInfoExtractor(InfoExtractor): + @staticmethod + def _build_request(url): + """Build a request with the family filter disabled""" + request = compat_urllib_request.Request(url) + request.add_header('Cookie', 'family_filter=off') + return request -class DailymotionIE(SubtitlesInfoExtractor): +class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' @@ -40,8 +47,7 @@ class DailymotionIE(SubtitlesInfoExtractor): url = 'http://www.dailymotion.com/video/%s' % video_id # Retrieve video webpage to extract further information - request = compat_urllib_request.Request(url) - request.add_header('Cookie', 'family_filter=off') + request = self._build_request(url) webpage = self._download_webpage(request, video_id) # Extract URL, uploader and title from webpage @@ -113,7 +119,7 @@ class DailymotionIE(SubtitlesInfoExtractor): return {} -class DailymotionPlaylistIE(InfoExtractor): +class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): IE_NAME = u'dailymotion:playlist' _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/' _MORE_PAGES_INDICATOR = r'
' @@ -122,7 +128,8 @@ class DailymotionPlaylistIE(InfoExtractor): def _extract_entries(self, id): video_ids = [] for pagenum in itertools.count(1): - webpage = self._download_webpage(self._PAGE_TEMPLATE % (id, pagenum), + request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum)) + webpage = self._download_webpage(request, id, u'Downloading page %s' % pagenum) playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) From 46353f6783b9e468c9271c864f0711c85d3cea33 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 29 Sep 2013 11:17:38 +0200 Subject: [PATCH 098/103] [update] Look for .exe extension on Windows (Fixes #745) --- youtube_dl/__init__.py | 2 +- youtube_dl/update.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 3851fc0a6..28a7bdd92 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -658,7 +658,7 @@ def _real_main(argv=None): # Update version if opts.update_self: - update_self(ydl.to_screen, opts.verbose, sys.argv[0]) + update_self(ydl.to_screen, opts.verbose) # Maybe do nothing if len(all_urls) < 1: diff --git a/youtube_dl/update.py b/youtube_dl/update.py index ccab6f27f..669b59a68 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -1,6 +1,7 @@ import json import traceback import hashlib +import sys from zipimport import zipimporter from .utils import * @@ -34,7 +35,7 @@ def rsa_verify(message, signature, key): if signature != sha256(message).digest(): return False return True -def update_self(to_screen, verbose, filename): +def update_self(to_screen, verbose): """Update the program file with the latest version from the repository""" UPDATE_URL = "http://rg3.github.io/youtube-dl/update/" @@ -42,7 +43,6 @@ def update_self(to_screen, verbose, filename): JSON_URL = UPDATE_URL + 'versions.json' UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) - if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"): to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.') return @@ -80,6 +80,12 @@ def update_self(to_screen, verbose, filename): print_notes(to_screen, versions_info['versions']) + filename = sys.argv[0] + # Py2EXE: Filename could be different + if hasattr(sys, "frozen") and not os.path.isfile(filename): + if os.path.isfile(filename + u'.exe'): + filename += u'.exe' + if not os.access(filename, os.W_OK): to_screen(u'ERROR: no write permissions on %s' % filename) return From d27903703673e565a3a1e8dd418d1347ef331b3e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 29 Sep 2013 11:26:01 +0200 Subject: [PATCH 099/103] [update] Prevent cmd window popup on Windows (Fixes #1478) --- youtube_dl/update.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/youtube_dl/update.py b/youtube_dl/update.py index 669b59a68..0689a4891 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -1,6 +1,8 @@ +import io import json import traceback import hashlib +import subprocess import sys from zipimport import zipimporter @@ -75,8 +77,9 @@ def update_self(to_screen, verbose): to_screen(u'ERROR: the versions file signature is invalid. Aborting.') return - to_screen(u'Updating to version ' + versions_info['latest'] + '...') - version = versions_info['versions'][versions_info['latest']] + version_id = versions_info['latest'] + to_screen(u'Updating to version ' + version_id + '...') + version = versions_info['versions'][version_id] print_notes(to_screen, versions_info['versions']) @@ -122,16 +125,18 @@ def update_self(to_screen, verbose): try: bat = os.path.join(directory, 'youtube-dl-updater.bat') - b = open(bat, 'w') - b.write(""" -echo Updating youtube-dl... + with io.open(bat, 'w') as batfile: + batfile.write(u""" +@echo off +echo Waiting for file handle to be closed ... ping 127.0.0.1 -n 5 -w 1000 > NUL -move /Y "%s.new" "%s" -del "%s" - \n""" %(exe, exe, bat)) - b.close() +move /Y "%s.new" "%s" > NUL +echo Updated youtube-dl to version %s. +start /b "" cmd /c del "%%~f0"&exit /b" + \n""" % (exe, exe, version_id)) - os.startfile(bat) + subprocess.Popen([bat]) # Continues to run in the background + return # Do not show premature success messages except (IOError, OSError) as err: if verbose: to_screen(compat_str(traceback.format_exc())) to_screen(u'ERROR: unable to overwrite current version') From 138a5454b5f2af27b0b31764a8125cad23fd3429 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 29 Sep 2013 14:38:37 +0200 Subject: [PATCH 100/103] release 2013.09.29 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8e6356dab..e3e5d5538 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.09.24.2' +__version__ = '2013.09.29' From 843530568f326294d714b5b9f11bbf6176d73ccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 29 Sep 2013 20:49:58 +0200 Subject: [PATCH 101/103] [appletrailers] Rework extraction (fixes #1387) The exraction was broken: * The includes page contains img elements that need to be fixed. * Use the 'itunes.inc' page, it contains a json dictionary for each trailer with information. * Get the formats from 'includes/settings{trailer_name}.json' * Use urljoin to allow urls with a fragment identifier to work Removed the thumbnail urls from the tests, they are different now. --- youtube_dl/extractor/appletrailers.py | 112 ++++++++++---------------- 1 file changed, 42 insertions(+), 70 deletions(-) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 8b191c196..b86c4b909 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -1,8 +1,10 @@ import re import xml.etree.ElementTree +import json from .common import InfoExtractor from ..utils import ( + compat_urlparse, determine_ext, ) @@ -14,10 +16,9 @@ class AppleTrailersIE(InfoExtractor): u"playlist": [ { u"file": u"manofsteel-trailer4.mov", - u"md5": u"11874af099d480cc09e103b189805d5f", + u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8", u"info_dict": { u"duration": 111, - u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg", u"title": u"Trailer 4", u"upload_date": u"20130523", u"uploader_id": u"wb", @@ -25,10 +26,9 @@ class AppleTrailersIE(InfoExtractor): }, { u"file": u"manofsteel-trailer3.mov", - u"md5": u"07a0a262aae5afe68120eed61137ab34", + u"md5": u"b8017b7131b721fb4e8d6f49e1df908c", u"info_dict": { u"duration": 182, - u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg", u"title": u"Trailer 3", u"upload_date": u"20130417", u"uploader_id": u"wb", @@ -36,10 +36,9 @@ class AppleTrailersIE(InfoExtractor): }, { u"file": u"manofsteel-trailer.mov", - u"md5": u"e401fde0813008e3307e54b6f384cff1", + u"md5": u"d0f1e1150989b9924679b441f3404d48", u"info_dict": { u"duration": 148, - u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg", u"title": u"Trailer", u"upload_date": u"20121212", u"uploader_id": u"wb", @@ -47,10 +46,9 @@ class AppleTrailersIE(InfoExtractor): }, { u"file": u"manofsteel-teaser.mov", - u"md5": u"76b392f2ae9e7c98b22913c10a639c97", + u"md5": u"5fe08795b943eb2e757fa95cb6def1cb", u"info_dict": { u"duration": 93, - u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg", u"title": u"Teaser", u"upload_date": u"20120721", u"uploader_id": u"wb", @@ -59,87 +57,61 @@ class AppleTrailersIE(InfoExtractor): ] } + _JSON_RE = r'iTunes.playURL\((.*?)\);' + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) movie = mobj.group('movie') uploader_id = mobj.group('company') - playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc' + playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc') playlist_snippet = self._download_webpage(playlist_url, movie) - playlist_cleaned = re.sub(r'(?s)', u'', playlist_snippet) + playlist_cleaned = re.sub(r'(?s).*?', u'', playlist_snippet) + playlist_cleaned = re.sub(r'', r'', playlist_cleaned) + # The ' in the onClick attributes are not escaped, it couldn't be parsed + # with xml.etree.ElementTree.fromstring + # like: http://trailers.apple.com/trailers/wb/gravity/ + def _clean_json(m): + return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') + playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned) playlist_html = u'' + playlist_cleaned + u'' - size_cache = {} - doc = xml.etree.ElementTree.fromstring(playlist_html) playlist = [] for li in doc.findall('./div/ul/li'): - title = li.find('.//h3').text + on_click = li.find('.//a').attrib['onClick'] + trailer_info_json = self._search_regex(self._JSON_RE, + on_click, u'trailer info') + trailer_info = json.loads(trailer_info_json) + title = trailer_info['title'] video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() thumbnail = li.find('.//img').attrib['src'] + upload_date = trailer_info['posted'].replace('-', '') - date_el = li.find('.//p') - upload_date = None - m = re.search(r':\s?(?P[0-9]{2})/(?P[0-9]{2})/(?P[0-9]{2})', date_el.text) - if m: - upload_date = u'20' + m.group('year') + m.group('month') + m.group('day') - runtime_el = date_el.find('./br') - m = re.search(r':\s?(?P[0-9]+):(?P[0-9]{1,2})', runtime_el.tail) + runtime = trailer_info['runtime'] + m = re.search(r'(?P[0-9]+):(?P[0-9]{1,2})', runtime) duration = None if m: duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) - formats = [] - for formats_el in li.findall('.//a'): - if formats_el.attrib['class'] != 'OverlayPanel': - continue - target = formats_el.attrib['target'] - - format_code = formats_el.text - if 'Automatic' in format_code: - continue + first_url = trailer_info['url'] + trailer_id = first_url.split('/')[-1].rpartition('_')[0] + settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) + settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json') + settings = json.loads(settings_json) - size_q = formats_el.attrib['href'] - size_id = size_q.rpartition('#videos-')[2] - if size_id not in size_cache: - size_url = url + size_q - sizepage_html = self._download_webpage( - size_url, movie, - note=u'Downloading size info %s' % size_id, - errnote=u'Error while downloading size info %s' % size_id, - ) - _doc = xml.etree.ElementTree.fromstring(sizepage_html) - size_cache[size_id] = _doc - - sizepage_doc = size_cache[size_id] - links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a') - for vid_a in links: - href = vid_a.get('href') - if not href.endswith(target): - continue - detail_q = href.partition('#')[0] - detail_url = url + '/' + detail_q - - m = re.match(r'includes/(?P[^/]+)/', detail_q) - detail_id = m.group('detail_id') - - detail_html = self._download_webpage( - detail_url, movie, - note=u'Downloading detail %s %s' % (detail_id, size_id), - errnote=u'Error while downloading detail %s %s' % (detail_id, size_id) - ) - detail_doc = xml.etree.ElementTree.fromstring(detail_html) - movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a') - assert movie_link_el.get('class') == 'movieLink' - movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h') - ext = determine_ext(movie_link) - assert ext == 'mov' - - formats.append({ - 'format': format_code, - 'ext': ext, - 'url': movie_link, - }) + formats = [] + for format in settings['metadata']['sizes']: + # The src is a file pointing to the real video file + format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) + formats.append({ + 'url': format_url, + 'ext': determine_ext(format_url), + 'format': format['type'], + 'width': format['width'], + 'height': int(format['height']), + }) + formats = sorted(formats, key=lambda f: (f['height'], f['width'])) info = { '_type': 'video', From bb4aa62cf7ad3d5aae4edf56ab8954c80a2d8956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 29 Sep 2013 20:59:19 +0200 Subject: [PATCH 102/103] [appletrailers] The request for the settings must have the trailer name in lower case (fixes #1329) --- youtube_dl/extractor/appletrailers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index b86c4b909..6d6237f8a 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -95,7 +95,7 @@ class AppleTrailersIE(InfoExtractor): duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) first_url = trailer_info['url'] - trailer_id = first_url.split('/')[-1].rpartition('_')[0] + trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json') settings = json.loads(settings_json) From 722076a123c60ed6d5a978c4bc2609f46c8e3ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 29 Sep 2013 23:07:26 +0200 Subject: [PATCH 103/103] [rtlnow] Replace one of the tests The video is no longer available. --- youtube_dl/extractor/rtlnow.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index e6fa0475e..32541077f 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -63,13 +63,13 @@ class RTLnowIE(InfoExtractor): }, }, { - u'url': u'http://www.rtlnitronow.de/recht-ordnung/fahrradpolizei-koeln-fischereiaufsicht-ruegen.php?film_id=124311&player=1&season=1', - u'file': u'124311.flv', + u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1', + u'file': u'127367.flv', u'info_dict': { - u'upload_date': u'20130830', - u'title': u'Recht & Ordnung - Fahrradpolizei Köln & Fischereiaufsicht Rügen', - u'description': u'Fahrradpolizei Köln & Fischereiaufsicht Rügen', - u'thumbnail': u'http://autoimg.static-fra.de/nitronow/338273/1500x1500/image2.jpg' + u'upload_date': u'20130926', + u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...', + u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin', + u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg', }, u'params': { u'skip_download': True,