From 59ae56fad5dbbc7178024e3e4a539f3cc6bc60d5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 11 Jul 2013 16:12:08 +0200 Subject: [PATCH] Add helper function find_path_attr --- test/test_utils.py | 15 +++++++++++++++ youtube_dl/utils.py | 14 ++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index c4b71362e..be1069105 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -4,6 +4,7 @@ import sys import unittest +import xml.etree.ElementTree # Allow direct execution import os @@ -16,6 +17,7 @@ from youtube_dl.utils import unescapeHTML from youtube_dl.utils import orderedSet from youtube_dl.utils import DateRange from youtube_dl.utils import unified_strdate +from youtube_dl.utils import find_xpath_attr if sys.version_info < (3, 0): _compat_str = lambda b: b.decode('unicode-escape') @@ -112,5 +114,18 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') + def test_find_xpath_attr(self): + testxml = u''' + + + + + ''' + doc = xml.etree.ElementTree.fromstring(testxml) + + self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) + self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) + self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b9bff5fde..76fa2950c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -198,6 +198,20 @@ else: with open(fn, 'w', encoding='utf-8') as f: json.dump(obj, f) +if sys.version_info >= (2,7): + def find_xpath_attr(node, xpath, key, val): + """ Find the xpath xpath[@key=val] """ + assert re.match(r'^[a-z]+$', key) + assert re.match(r'^[a-z]*$', val) + expr = xpath + u"[@%s='%s']" % (key, val) + return node.find(expr) +else: + def find_xpath_attr(node, xpath, key, val): + for f in node.findall(xpath): + if f.attrib.get(key) == val: + return f + return None + def htmlentity_transform(matchobj): """Transforms an HTML entity to a character.