From 95f3f7c20a05e7ac490e768b8470b20538ef8581 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 19 Aug 2017 21:40:53 +0800 Subject: [PATCH] [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) --- ChangeLog | 6 ++++++ test/test_utils.py | 1 + youtube_dl/utils.py | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 9a0fad673..9eab4d1e7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) + + version 2017.08.18 Core diff --git a/test/test_utils.py b/test/test_utils.py index 2aab16b97..e50f3764e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -279,6 +279,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescapeHTML('�'), '�') + self.assertEqual(unescapeHTML('&a"'), '&a"') # HTML5 entities self.assertEqual(unescapeHTML('.''), '.\'') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c9cbd5842..2554a2abd 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -596,7 +596,7 @@ def unescapeHTML(s): assert type(s) == compat_str return re.sub( - r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) + r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) def get_subprocess_encoding():