From 8cc3eba79ae19cf5ec4780356b75ccb9813916f0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 27 Oct 2014 02:43:59 +0100 Subject: [PATCH] [phoenix] Add new extractor (Fixes #4036) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/phoenix.py | 31 +++++++ youtube_dl/extractor/zdf.py | 151 ++++++++++++++++--------------- 3 files changed, 109 insertions(+), 74 deletions(-) create mode 100644 youtube_dl/extractor/phoenix.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 17ab49283..3979b8270 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -280,6 +280,7 @@ from .orf import ( from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE +from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .planetaplay import PlanetaPlayIE from .played import PlayedIE diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py new file mode 100644 index 000000000..a20672c0c --- /dev/null +++ b/youtube_dl/extractor/phoenix.py @@ -0,0 +1,31 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from .zdf import extract_from_xml_url + + +class PhoenixIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.phoenix.de/content/884301', + 'md5': 'ed249f045256150c92e72dbb70eadec6', + 'info_dict': { + 'id': '884301', + 'ext': 'mp4', + 'title': 'Michael Krons mit Hans-Werner Sinn', + 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr', + 'upload_date': '20141025', + 'uploader': 'Im Dialog', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + internal_id = self._search_regex( + r'
[^_]+)_(?P[^_]+)_(?P[^_]+)_ + (?P[^_]+)_(?P[^_]+)_(?P[^_]+) + ''', format_id) + + ext = format_m.group('container') + proto = format_m.group('proto').lower() + + quality = fnode.find('./quality').text + abr = int(fnode.find('./audioBitrate').text) // 1000 + vbr_node = fnode.find('./videoBitrate') + vbr = None if vbr_node is None else int(vbr_node.text) // 1000 + + width_node = fnode.find('./width') + width = None if width_node is None else int_or_none(width_node.text) + height_node = fnode.find('./height') + height = None if height_node is None else int_or_none(height_node.text) + + format_note = '' + if not format_note: + format_note = None + + return { + 'format_id': format_id + '-' + quality, + 'url': video_url, + 'ext': ext, + 'acodec': format_m.group('acodec'), + 'vcodec': format_m.group('vcodec'), + 'abr': abr, + 'vbr': vbr, + 'width': width, + 'height': height, + 'filesize': int_or_none(fnode.find('./filesize').text), + 'format_note': format_note, + 'protocol': proto, + '_available': is_available, + } + + format_nodes = doc.findall('.//formitaeten/formitaet') + formats = list(filter( + lambda f: f['_available'], + map(xml_to_format, format_nodes))) + ie._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'upload_date': upload_date, + 'formats': formats, + } + + class ZDFIE(InfoExtractor): _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P#)?/(.*beitrag/(?:video/)?)(?P[0-9]+)(?:/[^/?]+)?(?:\?.*)?' @@ -32,77 +108,4 @@ class ZDFIE(InfoExtractor): video_id = self._match_id(url) xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id - doc = self._download_xml( - xml_url, video_id, - note='Downloading video info', - errnote='Failed to download video info') - - title = doc.find('.//information/title').text - description = doc.find('.//information/detail').text - duration = int(doc.find('.//details/lengthSec').text) - uploader_node = doc.find('.//details/originChannelTitle') - uploader = None if uploader_node is None else uploader_node.text - uploader_id_node = doc.find('.//details/originChannelId') - uploader_id = None if uploader_id_node is None else uploader_id_node.text - upload_date = unified_strdate(doc.find('.//details/airtime').text) - - def xml_to_format(fnode): - video_url = fnode.find('url').text - is_available = 'http://www.metafilegenerator' not in video_url - - format_id = fnode.attrib['basetype'] - format_m = re.match(r'''(?x) - (?P[^_]+)_(?P[^_]+)_(?P[^_]+)_ - (?P[^_]+)_(?P[^_]+)_(?P[^_]+) - ''', format_id) - - ext = format_m.group('container') - proto = format_m.group('proto').lower() - - quality = fnode.find('./quality').text - abr = int(fnode.find('./audioBitrate').text) // 1000 - vbr_node = fnode.find('./videoBitrate') - vbr = None if vbr_node is None else int(vbr_node.text) // 1000 - - width_node = fnode.find('./width') - width = None if width_node is None else int_or_none(width_node.text) - height_node = fnode.find('./height') - height = None if height_node is None else int_or_none(height_node.text) - - format_note = '' - if not format_note: - format_note = None - - return { - 'format_id': format_id + '-' + quality, - 'url': video_url, - 'ext': ext, - 'acodec': format_m.group('acodec'), - 'vcodec': format_m.group('vcodec'), - 'abr': abr, - 'vbr': vbr, - 'width': width, - 'height': height, - 'filesize': int_or_none(fnode.find('./filesize').text), - 'format_note': format_note, - 'protocol': proto, - '_available': is_available, - } - - format_nodes = doc.findall('.//formitaeten/formitaet') - formats = list(filter( - lambda f: f['_available'], - map(xml_to_format, format_nodes))) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'upload_date': upload_date, - 'formats': formats, - } \ No newline at end of file + return extract_from_xml_url(self, video_id, xml_url)