From dab647a7b66646f82b12638729b36df02609f75d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 5 Nov 2014 21:32:46 +0700 Subject: [PATCH] [cinemassacre] Keep both extraction approaches and make more robust (Closes #4109) --- youtube_dl/extractor/cinemassacre.py | 78 +++++++++++++++++----------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 806e378b1..31fe906b4 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -42,11 +42,12 @@ class CinemassacreIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') - mobj = re.search(r'src="(?Phttp://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P.+?))"', webpage) + mobj = re.search(r'src="(?Phttp://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P(?:Cinemassacre-)?(?P.+?)))"', webpage) if not mobj: raise ExtractorError('Can\'t extract embed url and video id') playerdata_url = mobj.group('embed_url') video_id = mobj.group('video_id') + full_video_id = mobj.group('full_video_id') video_title = self._html_search_regex( r'(?P<title>.+?)\|', webpage, 'title') @@ -60,37 +61,52 @@ class CinemassacreIE(InfoExtractor): vidurl = self._search_regex( r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') - videolist_url = self._search_regex( - r"file\s*:\s*'(http.+?/jwplayer\.smil)'", playerdata, 'jwplayer.smil') - videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML') + videolist_url = None - formats = [] - baseurl = vidurl[:vidurl.rfind('/')+1] - for video in videolist.findall('.//video'): - src = video.get('src') - if not src: - continue - file_ = src.partition(':')[-1] - width = int_or_none(video.get('width')) - height = int_or_none(video.get('height')) - bitrate = int_or_none(video.get('system-bitrate')) - format = { - 'url': baseurl + file_, - 'format_id': src.rpartition('.')[0].rpartition('_')[-1], - } - if width or height: - format.update({ - 'tbr': bitrate // 1000 if bitrate else None, - 'width': width, - 'height': height, - }) - else: - format.update({ - 'abr': bitrate // 1000 if bitrate else None, - 'vcodec': 'none', - }) - formats.append(format) - self._sort_formats(formats) + mobj = re.search(r"'videoserver'\s*:\s*'(?P<videoserver>[^']+)'", playerdata) + if mobj: + videoserver = mobj.group('videoserver') + mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata) + vidid = mobj.group('vidid') if mobj else full_video_id + videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid) + else: + mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata) + if mobj: + videolist_url = mobj.group('smil') + + if videolist_url: + videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML') + formats = [] + baseurl = vidurl[:vidurl.rfind('/')+1] + for video in videolist.findall('.//video'): + src = video.get('src') + if not src: + continue + file_ = src.partition(':')[-1] + width = int_or_none(video.get('width')) + height = int_or_none(video.get('height')) + bitrate = int_or_none(video.get('system-bitrate')) + format = { + 'url': baseurl + file_, + 'format_id': src.rpartition('.')[0].rpartition('_')[-1], + } + if width or height: + format.update({ + 'tbr': bitrate // 1000 if bitrate else None, + 'width': width, + 'height': height, + }) + else: + format.update({ + 'abr': bitrate // 1000 if bitrate else None, + 'vcodec': 'none', + }) + formats.append(format) + self._sort_formats(formats) + else: + formats = [{ + 'url': vidurl, + }] return { 'id': video_id,