From 70852b47ca101f0b4acc76eb3213b763a14b3602 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 20 Aug 2016 00:12:32 +0800 Subject: [PATCH] [utils] Recognize units with full names in parse_filename Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes --- ChangeLog | 4 +++- test/test_utils.py | 1 + youtube_dl/utils.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 450351231..b36e4438c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,9 @@ version Core -* Support m3u8 manifests in HTML5 multimedia tags ++ Recognize file size strings with full unit names (for example "8.5 + megabytes") ++ Support m3u8 manifests in HTML5 multimedia tags * Fix js_to_json(): correct octal or hexadecimal number detection Extractors diff --git a/test/test_utils.py b/test/test_utils.py index b83da93b4..d16ea7f77 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -823,6 +823,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_filesize('1.2tb'), 1200000000000) self.assertEqual(parse_filesize('1,24 KB'), 1240) self.assertEqual(parse_filesize('1,24 kb'), 1240) + self.assertEqual(parse_filesize('8.5 megabytes'), 8500000) def test_parse_count(self): self.assertEqual(parse_count(None), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0c36c1b80..41ca562f1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1504,46 +1504,63 @@ def parse_filesize(s): _UNIT_TABLE = { 'B': 1, 'b': 1, + 'bytes': 1, 'KiB': 1024, 'KB': 1000, 'kB': 1024, 'Kb': 1000, 'kb': 1000, + 'kilobytes': 1000, + 'kibibytes': 1024, 'MiB': 1024 ** 2, 'MB': 1000 ** 2, 'mB': 1024 ** 2, 'Mb': 1000 ** 2, 'mb': 1000 ** 2, + 'megabytes': 1000 ** 2, + 'mebibytes': 1024 ** 2, 'GiB': 1024 ** 3, 'GB': 1000 ** 3, 'gB': 1024 ** 3, 'Gb': 1000 ** 3, 'gb': 1000 ** 3, + 'gigabytes': 1000 ** 3, + 'gibibytes': 1024 ** 3, 'TiB': 1024 ** 4, 'TB': 1000 ** 4, 'tB': 1024 ** 4, 'Tb': 1000 ** 4, 'tb': 1000 ** 4, + 'terabytes': 1000 ** 4, + 'tebibytes': 1024 ** 4, 'PiB': 1024 ** 5, 'PB': 1000 ** 5, 'pB': 1024 ** 5, 'Pb': 1000 ** 5, 'pb': 1000 ** 5, + 'petabytes': 1000 ** 5, + 'pebibytes': 1024 ** 5, 'EiB': 1024 ** 6, 'EB': 1000 ** 6, 'eB': 1024 ** 6, 'Eb': 1000 ** 6, 'eb': 1000 ** 6, + 'exabytes': 1000 ** 6, + 'exbibytes': 1024 ** 6, 'ZiB': 1024 ** 7, 'ZB': 1000 ** 7, 'zB': 1024 ** 7, 'Zb': 1000 ** 7, 'zb': 1000 ** 7, + 'zettabytes': 1000 ** 7, + 'zebibytes': 1024 ** 7, 'YiB': 1024 ** 8, 'YB': 1000 ** 8, 'yB': 1024 ** 8, 'Yb': 1000 ** 8, 'yb': 1000 ** 8, + 'yottabytes': 1000 ** 8, + 'yobibytes': 1024 ** 8, } return lookup_unit_table(_UNIT_TABLE, s)