From e7b6d12254702a4aa6a9f54420f80e6ea456b120 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 30 Sep 2014 11:12:59 +0200 Subject: [PATCH] [utils] Improve and test js_to_json --- test/test_utils.py | 20 +++++++++++++++--- youtube_dl/extractor/common.py | 6 +++++- youtube_dl/utils.py | 37 +++++++++++++++------------------- 3 files changed, 38 insertions(+), 25 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 113aa44b2..bcca0efea 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -332,14 +332,28 @@ class TestUtil(unittest.TestCase): ) self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') - def test_js_to_json(self): + def test_js_to_json_realworld(self): inp = '''{ - 'clip':{'provider':'pseudo'} + 'clip':{'provider':'pseudo'} }''' self.assertEqual(js_to_json(inp), '''{ - "clip":{"provider":"pseudo"} + "clip":{"provider":"pseudo"} }''') json.loads(js_to_json(inp)) + inp = '''{ + 'playlist':[{'controls':{'all':null}}] + }''' + self.assertEqual(js_to_json(inp), '''{ + "playlist":[{"controls":{"all":null}}] + }''') + + def test_js_to_json_edgecases(self): + on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") + self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) + + on = js_to_json('{"abc": true}') + self.assertEqual(json.loads(on), {'abc': True}) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f43a0a569..611cf95f1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -334,7 +334,11 @@ class InfoExtractor(object): try: return json.loads(json_string) except ValueError as ve: - raise ExtractorError('Failed to download JSON', cause=ve) + errmsg = '%s: Failed to parse JSON ' % video_id + if fatal: + raise ExtractorError(errmsg, cause=ve) + else: + self.report_warning(errmsg + str(ve)) def report_warning(self, msg, video_id=None): idstr = '' if video_id is None else '%s: ' % video_id diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 59851a8c0..f8dd9c72d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1580,29 +1580,24 @@ def strip_jsonp(code): def js_to_json(code): def fix_kv(m): - key = m.group(2) - if key.startswith("'"): - assert key.endswith("'") - assert '"' not in key - key = '"%s"' % key[1:-1] - elif not key.startswith('"'): - key = '"%s"' % key - - value = m.group(4) - if value.startswith("'"): - assert value.endswith("'") - assert '"' not in value - value = '"%s"' % value[1:-1] - - return m.group(1) + key + m.group(3) + value + v = m.group(0) + if v in ('true', 'false', 'null'): + return v + if v.startswith('"'): + return v + if v.startswith("'"): + v = v[1:-1] + v = re.sub(r"\\\\|\\'|\"", lambda m: { + '\\\\': '\\\\', + "\\'": "'", + '"': '\\"', + }[m.group(0)], v) + return '"%s"' % v res = re.sub(r'''(?x) - ([{,]\s*) - ("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+) - (:\s*) - ([0-9.]+|true|false|"[^"]*"|\'[^\']*\'| - (?=\[|\{) - ) + "(?:[^"\\]*(?:\\\\|\\")?)*"| + '(?:[^'\\]*(?:\\\\|\\')?)*'| + [a-zA-Z_][a-zA-Z_0-9]* ''', fix_kv, code) res = re.sub(r',(\s*\])', lambda m: m.group(1), res) return res