From 1a117a77287e7dbd4d92f29062dabcf4efb86cb5 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 24 Jul 2015 12:00:20 +0100 Subject: [PATCH] [clipfish] extract mp4 video link --- youtube_dl/extractor/clipfish.py | 37 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py index a5c3cb7c6..09dfaac60 100644 --- a/youtube_dl/extractor/clipfish.py +++ b/youtube_dl/extractor/clipfish.py @@ -1,13 +1,11 @@ from __future__ import unicode_literals -import re -import time -import xml.etree.ElementTree - from .common import InfoExtractor from ..utils import ( ExtractorError, - parse_duration, + int_or_none, + js_to_json, + determine_ext, ) @@ -17,37 +15,40 @@ class ClipfishIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P[0-9]+)/' _TEST = { 'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/', - 'md5': '2521cd644e862936cf2e698206e47385', + 'md5': '79bc922f3e8a9097b3d68a93780fd475', 'info_dict': { 'id': '3966754', 'ext': 'mp4', 'title': 'FIFA 14 - E3 2013 Trailer', 'duration': 82, - }, - 'skip': 'Blocked in the US' + } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_info = self._parse_json( + js_to_json(self._html_search_regex('var videoObject = ({[^}]+?})', webpage, 'videoObject')), + video_id + ) + info_url = self._parse_json( + js_to_json(self._html_search_regex('var globalFlashvars = ({[^}]+?})', webpage, 'globalFlashvars')), + video_id + )['data'] - info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' % - (video_id, int(time.time()))) doc = self._download_xml( info_url, video_id, note='Downloading info page') title = doc.find('title').text video_url = doc.find('filename').text - if video_url is None: - xml_bytes = xml.etree.ElementTree.tostring(doc) - raise ExtractorError('Cannot find video URL in document %r' % - xml_bytes) thumbnail = doc.find('imageurl').text - duration = parse_duration(doc.find('duration').text) + duration = int_or_none(video_info['length']) + formats = [{'url': video_info['videourl']},{'url': video_url}] + self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'url': video_url, + 'formats': formats, 'thumbnail': thumbnail, 'duration': duration, }