diff --git a/test/test_utils.py b/test/test_utils.py index 5a0109977..9a3a8ddff 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -641,8 +641,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(extract_attributes(''), {'x': 'y'}) self.assertEqual(extract_attributes(''), {'x': '&'}) # XML self.assertEqual(extract_attributes(''), {'x': '"'}) - self.assertEqual(extract_attributes(''), {'x': '£'}) # HTML 3.2 - self.assertEqual(extract_attributes(''), {'x': 'λ'}) # HTML 4.0 + self.assertEqual(extract_attributes(''), {'x': '£'}) # HTML 3.2 + self.assertEqual(extract_attributes(''), {'x': 'λ'}) # HTML 4.0 self.assertEqual(extract_attributes(''), {'x': '&foo'}) self.assertEqual(extract_attributes(''), {'x': "'"}) self.assertEqual(extract_attributes(''), {'x': '"'}) @@ -654,7 +654,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(extract_attributes(''), {'x': 'y'}) self.assertEqual(extract_attributes(""), {'x': 'y'}) self.assertEqual(extract_attributes(''), {'x': '\ny\n'}) - self.assertEqual(extract_attributes(''), {'caps': 'x'}) # Names lowercased + self.assertEqual(extract_attributes(''), {'caps': 'x'}) # Names lowercased self.assertEqual(extract_attributes(''), {'x': '2'}) self.assertEqual(extract_attributes(''), {'x': '2'}) self.assertEqual(extract_attributes(''), {'_:funny-name1': '1'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ec186918c..8ec1bd469 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -273,15 +273,17 @@ def get_element_by_attribute(attribute, value, html): return unescapeHTML(res) + class HTMLAttributeParser(compat_HTMLParser): """Trivial HTML parser to gather the attributes for a single element""" def __init__(self): - self.attrs = { } + self.attrs = {} compat_HTMLParser.__init__(self) def handle_starttag(self, tag, attrs): self.attrs = dict(attrs) + def extract_attributes(html_element): """Given a string for an HTML element such as