From bec2248141c9cc3e44794d7ee48a6bcfc4904eac Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Jan 2015 11:43:36 +0100 Subject: [PATCH] [InfoExtractor/common] Correct and test meta tag matching --- test/test_InfoExtractor.py | 18 ++++++++++++++++++ youtube_dl/extractor/common.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 13c18ed95..be8d12997 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') + def test_html_search_meta(self): + ie = self.ie + html = ''' + + + + + + + ''' + + self.assertEqual(ie._html_search_meta('a', html), '1') + self.assertEqual(ie._html_search_meta('b', html), '2') + self.assertEqual(ie._html_search_meta('c', html), '3') + self.assertEqual(ie._html_search_meta('d', html), '4') + self.assertEqual(ie._html_search_meta('e', html), '5') + self.assertEqual(ie._html_search_meta('f', html), '6') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index df32b5ca0..d703893dc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -594,7 +594,7 @@ class InfoExtractor(object): return self._html_search_regex( r'''(?isx)]+(?:itemprop|name|property)=(["\']?)%s\1) - [^>]+content=(["\'])(?P.*?)\1''' % re.escape(name), + [^>]+?content=(["\'])(?P.*?)\2''' % re.escape(name), html, display_name, fatal=fatal, group='content', **kwargs) def _dc_search_uploader(self, html):