From 1846e9ade0fb9508459282a992539c700aa26f9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 May 2016 22:31:08 +0600 Subject: [PATCH] [localnews8] Fix extractor (Closes #9539) --- youtube_dl/extractor/localnews8.py | 38 ++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/localnews8.py b/youtube_dl/extractor/localnews8.py index b38d1d58a..aad396135 100644 --- a/youtube_dl/extractor/localnews8.py +++ b/youtube_dl/extractor/localnews8.py @@ -1,29 +1,47 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class LocalNews8IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?localnews8\.com/.+?/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P[^/]+)/(?P[0-9]+)' _TEST = { 'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304', - 'md5': '477bdb188f177788c65db27ecb56649b', + 'md5': 'be4d48aea61aa2bde7be2ee47691ad20', 'info_dict': { 'id': '35183304', + 'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings', 'ext': 'mp4', 'title': 'Rexburg business turns carbon fiber scraps into wedding ring', 'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.', - 'duration': '153', - 'timestamp': '1441844822', + 'duration': 153, + 'timestamp': 1441844822, + 'upload_date': '20150910', 'uploader_id': 'api', - }} + } + } def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) - partner_id = self._search_regex(r'partnerId\s*:\s*"(\d+)"', webpage, video_id) - kaltura_id = self._search_regex(r'var\s+videoIdString\s*=\s*"kaltura:(.+)";', webpage, video_id) + partner_id = self._search_regex( + r'partnerId\s*[:=]\s*(["\'])(?P\d+)\1', + webpage, 'partner id', group='id') + kaltura_id = self._search_regex( + r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P[0-9a-z_]+)\1', + webpage, 'videl id', group='id') - return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') + return { + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), + 'ie_key': 'Kaltura', + 'id': video_id, + 'display_id': display_id, + }