From 4eda10499e8db831167062b0e0dbc7d10d34c1f9 Mon Sep 17 00:00:00 2001 From: Kevin O'Connor Date: Sat, 17 Oct 2020 13:10:41 -0400 Subject: [PATCH 1/3] [utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851) The current logic in `js_to_json` tries to rewrite octal/hex numbers to decimal. However, when the logic actually happens the `"` or `'` have already been trimmed off. This causes what were originally strings, that happen to look like octal/hex numbers, to get rewritten to decimal and returned as a number rather than a string. In practive something like: ```js { "0x40": "foo", "040": "bar", } ``` would get rewritten as: ```json { 64: "foo", 32: "bar } ``` This is problematic since this isn't valid JSON as you cannot have non-string keys. --- test/test_utils.py | 6 ++++++ youtube_dl/utils.py | 12 ++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 962fd8d75..c2d1e4fb1 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -994,6 +994,12 @@ class TestUtil(unittest.TestCase): on = js_to_json('{42:4.2e1}') self.assertEqual(json.loads(on), {'42': 42.0}) + on = js_to_json('{ "0x40": "0x40" }') + self.assertEqual(json.loads(on), {'0x40': '0x40'}) + + on = js_to_json('{ "040": "040" }') + self.assertEqual(json.loads(on), {'040': '040'}) + def test_js_to_json_malformed(self): self.assertEqual(js_to_json('42a1'), '42"a1"') self.assertEqual(js_to_json('42a-1'), '42"a"-1') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 01d9c0362..737e2810e 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4088,12 +4088,12 @@ def js_to_json(code): '\\\n': '', '\\x': '\\u00', }.get(m.group(0), m.group(0)), v[1:-1]) - - for regex, base in INTEGER_TABLE: - im = re.match(regex, v) - if im: - i = int(im.group(1), base) - return '"%d":' % i if v.endswith(':') else '%d' % i + else: + for regex, base in INTEGER_TABLE: + im = re.match(regex, v) + if im: + i = int(im.group(1), base) + return '"%d":' % i if v.endswith(':') else '%d' % i return '"%s"' % v From c20b0e4fa7b73add166ed8b3ec371774e770447a Mon Sep 17 00:00:00 2001 From: ilpersi Date: Sun, 18 Oct 2020 10:53:54 +0200 Subject: [PATCH 2/3] Fix teachable course download --- youtube_dl/extractor/teachable.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index a75369dbe..5a2df1ce6 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -139,9 +139,9 @@ class TeachableIE(TeachableBaseIE): @staticmethod def _is_teachable(webpage): - return 'teachableTracker.linker:autoLink' in webpage and re.search( - r']+href=["\']https?://process\.fs\.teachablecdn\.com', - webpage) + return 'teachableTracker.linker:autoLink' in webpage and ( + re.search(r']+href=["\']https?://process\.fs\.teachablecdn\.com', webpage) or + re.search(r']+src=["\']https?://process\.fs\.teachablecdn\.com', webpage)) @staticmethod def _extract_url(webpage, source_url): From 5c0e66320de8377df522aa6c04fecf028c1e00b7 Mon Sep 17 00:00:00 2001 From: ilpersi Date: Sun, 18 Oct 2020 19:30:14 +0200 Subject: [PATCH 3/3] Fix flake8 --- youtube_dl/extractor/teachable.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py index 5a2df1ce6..375b37ca5 100644 --- a/youtube_dl/extractor/teachable.py +++ b/youtube_dl/extractor/teachable.py @@ -140,8 +140,12 @@ class TeachableIE(TeachableBaseIE): @staticmethod def _is_teachable(webpage): return 'teachableTracker.linker:autoLink' in webpage and ( - re.search(r']+href=["\']https?://process\.fs\.teachablecdn\.com', webpage) or - re.search(r']+src=["\']https?://process\.fs\.teachablecdn\.com', webpage)) + re.search( + r']+href=["\']https?://process\.fs\.teachablecdn\.com', + webpage) + or re.search( + r']+src=["\']https?://process\.fs\.teachablecdn\.com', + webpage)) @staticmethod def _extract_url(webpage, source_url):