From 17b598d30cae2c287f3556f874ddf0fc5d028aec Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Jan 2016 21:05:55 +0800 Subject: [PATCH] [common] _parse_dash_manifest() from youtube.py --- youtube_dl/extractor/common.py | 52 +++++++++++++++++++++++++++++++++ youtube_dl/extractor/youtube.py | 52 --------------------------------- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b3d57dfce..7ad255672 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1330,6 +1330,58 @@ class InfoExtractor(object): }) return entries + def _parse_dash_manifest(self, video_id, dash_doc, fatal=True): + formats = [] + for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): + mime_type = a.attrib.get('mimeType') + for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'): + url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') + if url_el is None: + continue + if mime_type == 'text/vtt': + # TODO implement WebVTT downloading + pass + elif mime_type.startswith('audio/') or mime_type.startswith('video/'): + segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList') + format_id = r.attrib['id'] + video_url = url_el.text + filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) + f = { + 'format_id': format_id, + 'url': video_url, + 'width': int_or_none(r.attrib.get('width')), + 'height': int_or_none(r.attrib.get('height')), + 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), + 'asr': int_or_none(r.attrib.get('audioSamplingRate')), + 'filesize': filesize, + 'fps': int_or_none(r.attrib.get('frameRate')), + } + if segment_list is not None: + f.update({ + 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], + 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], + 'protocol': 'http_dash_segments', + }) + try: + existing_format = next( + fo for fo in formats + if fo['format_id'] == format_id) + except StopIteration: + full_info = self._formats.get(format_id, {}).copy() + full_info.update(f) + codecs = r.attrib.get('codecs') + if codecs: + if full_info.get('acodec') == 'none': + full_info['vcodec'] = codecs + elif full_info.get('vcodec') == 'none': + full_info['acodec'] = codecs + formats.append(full_info) + else: + existing_format.update(f) + else: + self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) + return formats + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 08b7e15c4..acd5cf2c3 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1035,58 +1035,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') - def _parse_dash_manifest(self, video_id, dash_doc, fatal=True): - formats = [] - for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): - mime_type = a.attrib.get('mimeType') - for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'): - url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') - if url_el is None: - continue - if mime_type == 'text/vtt': - # TODO implement WebVTT downloading - pass - elif mime_type.startswith('audio/') or mime_type.startswith('video/'): - segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList') - format_id = r.attrib['id'] - video_url = url_el.text - filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) - f = { - 'format_id': format_id, - 'url': video_url, - 'width': int_or_none(r.attrib.get('width')), - 'height': int_or_none(r.attrib.get('height')), - 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), - 'asr': int_or_none(r.attrib.get('audioSamplingRate')), - 'filesize': filesize, - 'fps': int_or_none(r.attrib.get('frameRate')), - } - if segment_list is not None: - f.update({ - 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], - 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], - 'protocol': 'http_dash_segments', - }) - try: - existing_format = next( - fo for fo in formats - if fo['format_id'] == format_id) - except StopIteration: - full_info = self._formats.get(format_id, {}).copy() - full_info.update(f) - codecs = r.attrib.get('codecs') - if codecs: - if full_info.get('acodec') == 'none': - full_info['vcodec'] = codecs - elif full_info.get('vcodec') == 'none': - full_info['acodec'] = codecs - formats.append(full_info) - else: - existing_format.update(f) - else: - self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) - return formats - def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {})