From 5552c9eb0fece567f7dda13810939fca32d7d65a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 26 May 2017 21:58:18 +0800 Subject: [PATCH] [utils] Recognize more patterns in strip_jsonp() Used in Youku Show pages --- ChangeLog | 1 + test/test_utils.py | 8 ++++++++ youtube_dl/utils.py | 7 ++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 6a05657ab..d6e980c5a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core ++ [utils] strip_jsonp() can recognize more patterns * [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182) Extractors diff --git a/test/test_utils.py b/test/test_utils.py index f31559e71..d7e05817c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -678,6 +678,14 @@ class TestUtil(unittest.TestCase): d = json.loads(stripped) self.assertEqual(d, {'status': 'success'}) + stripped = strip_jsonp('window.cb && window.cb({"status": "success"});') + d = json.loads(stripped) + self.assertEqual(d, {'status': 'success'}) + + stripped = strip_jsonp('window.cb && cb({"status": "success"});') + d = json.loads(stripped) + self.assertEqual(d, {'status': 'success'}) + def test_uppercase_escape(self): self.assertEqual(uppercase_escape('aä'), 'aä') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4293a77f5..6c84bfe0f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2211,7 +2211,12 @@ def parse_age_limit(s): def strip_jsonp(code): return re.sub( - r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) + r'''(?sx)^ + (?:window\.)?(?P[a-zA-Z0-9_.$]+) + (?:\s*&&\s*(?P=func_name))? + \s*\(\s*(?P.*)\);? + \s*?(?://[^\n]*)*$''', + r'\g', code) def js_to_json(code):