From 2e1fa03bf5b165e930dd68278360b53036326cd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 10 Oct 2013 15:25:11 +0200 Subject: [PATCH] Add an extractor for video.nhl.com (closes #1586) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nhl.py | 59 ++++++++++++++++++++++++++++++++ youtube_dl/utils.py | 1 + 3 files changed, 61 insertions(+) create mode 100644 youtube_dl/extractor/nhl.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c01de6b5e..f44468d35 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -80,6 +80,7 @@ from .naver import NaverIE from .nba import NBAIE from .nbc import NBCNewsIE from .newgrounds import NewgroundsIE +from .nhl import NHLIE from .ooyala import OoyalaIE from .orf import ORFIE from .pbs import PBSIE diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py new file mode 100644 index 000000000..f86d9de7e --- /dev/null +++ b/youtube_dl/extractor/nhl.py @@ -0,0 +1,59 @@ +import re +import json +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + compat_urllib_parse, + determine_ext, + unified_strdate, +) + + +class NHLIE(InfoExtractor): + IE_NAME = u'nhl.com' + _VALID_URL = r'https?://video(?P\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P\d+)' + + _TEST = { + u'url': u'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', + u'file': u'453614.mp4', + u'info_dict': { + u'title': u'Quick clip: Weise 4-3 goal vs Flames', + u'description': u'Dale Weise scores his first of the season to put the Canucks up 4-3.', + u'duration': 18, + u'upload_date': u'20131006', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id + info_json = self._download_webpage(json_url, video_id, + u'Downloading info json') + info_json = info_json.replace('\\\'', '\'') + info = json.loads(info_json)[0] + + initial_video_url = info['publishPoint'] + data = compat_urllib_parse.urlencode({ + 'type': 'fvod', + 'path': initial_video_url.replace('.mp4', '_sd.mp4'), + }) + path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data + path_response = self._download_webpage(path_url, video_id, + u'Downloading final video url') + path_doc = xml.etree.ElementTree.fromstring(path_response) + video_url = path_doc.find('path').text + + join = compat_urlparse.urljoin + return { + 'id': video_id, + 'title': info['name'], + 'url': video_url, + 'ext': determine_ext(video_url), + 'description': info['description'], + 'duration': int(info['duration']), + 'thumbnail': join(join(video_url, '/u/'), info['bigImage']), + 'upload_date': unified_strdate(info['releaseDate'].split('.')[0]), + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index de2654762..82a1daeb9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -715,6 +715,7 @@ def unified_strdate(date_str): '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M', '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%dT%H:%M:%S', ] for expression in format_expressions: try: