From 7fe1592073c0a775dcd3ea7fcb400fbcfad624f7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 20 Dec 2016 12:23:16 +0100 Subject: [PATCH] [common] fix dash codec information for mixed videos and fragment url construction(#11490) --- test/test_utils.py | 1 + youtube_dl/extractor/common.py | 11 +++-------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 1cdac82fc..3092db5c1 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -461,6 +461,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(urljoin('http://foo.de/', None), None) self.assertEqual(urljoin('http://foo.de/', ''), None) self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) + self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') def test_parse_age_limit(self): self.assertEqual(parse_age_limit(None), None) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 40f3e2323..58da27025 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -59,6 +59,7 @@ from ..utils import ( parse_m3u8_attributes, extract_attributes, parse_codecs, + urljoin, ) @@ -1631,11 +1632,6 @@ class InfoExtractor(object): extract_Initialization(segment_template) return ms_info - def combine_url(base_url, target_url): - if re.match(r'^https?://', target_url): - return target_url - return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) - mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) formats = [] for period in mpd_doc.findall(_add_ns('Period')): @@ -1685,12 +1681,11 @@ class InfoExtractor(object): 'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000), 'asr': int_or_none(representation_attrib.get('audioSamplingRate')), 'fps': int_or_none(representation_attrib.get('frameRate')), - 'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'), - 'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'), 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None, 'format_note': 'DASH %s' % content_type, 'filesize': filesize, } + f.update(parse_codecs(representation_attrib.get('codecs'))) representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info: @@ -1774,7 +1769,7 @@ class InfoExtractor(object): f['fragments'].append({'url': initialization_url}) f['fragments'].extend(representation_ms_info['fragments']) for fragment in f['fragments']: - fragment['url'] = combine_url(base_url, fragment['url']) + fragment['url'] = urljoin(base_url, fragment['url']) try: existing_format = next( fo for fo in formats