[nhl] Add an extractor for videocenter's categories (#1586)

It downloads the last 12 videos.
master
Jaime Marquínez Ferrándiz 11 years ago
parent 9026dd3858
commit 91dbaef406

@ -16,6 +16,7 @@ from youtube_dl.extractor import (
UstreamChannelIE, UstreamChannelIE,
SoundcloudUserIE, SoundcloudUserIE,
LivestreamIE, LivestreamIE,
NHLVideocenterIE,
) )
from youtube_dl.utils import * from youtube_dl.utils import *
@ -74,5 +75,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'TEDCity2.0 (English)') self.assertEqual(result['title'], u'TEDCity2.0 (English)')
self.assertTrue(len(result['entries']) >= 4) self.assertTrue(len(result['entries']) >= 4)
def test_nhl_videocenter(self):
dl = FakeYDL()
ie = NHLVideocenterIE(dl)
result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'999')
self.assertEqual(result['title'], u'Highlights')
self.assertEqual(len(result['entries']), 12)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -81,7 +81,7 @@ from .naver import NaverIE
from .nba import NBAIE from .nba import NBAIE
from .nbc import NBCNewsIE from .nbc import NBCNewsIE
from .newgrounds import NewgroundsIE from .newgrounds import NewgroundsIE
from .nhl import NHLIE from .nhl import NHLIE, NHLVideocenterIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .orf import ORFIE from .orf import ORFIE
from .pbs import PBSIE from .pbs import PBSIE

@ -11,7 +11,40 @@ from ..utils import (
) )
class NHLIE(InfoExtractor): class NHLBaseInfoExtractor(InfoExtractor):
@staticmethod
def _fix_json(json_string):
return json_string.replace('\\\'', '\'')
def _extract_video(self, info):
video_id = info['id']
self.report_extraction(video_id)
initial_video_url = info['publishPoint']
data = compat_urllib_parse.urlencode({
'type': 'fvod',
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
})
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
path_response = self._download_webpage(path_url, video_id,
u'Downloading final video url')
path_doc = xml.etree.ElementTree.fromstring(path_response)
video_url = path_doc.find('path').text
join = compat_urlparse.urljoin
return {
'id': video_id,
'title': info['name'],
'url': video_url,
'ext': determine_ext(video_url),
'description': info['description'],
'duration': int(info['duration']),
'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
}
class NHLIE(NHLBaseInfoExtractor):
IE_NAME = u'nhl.com' IE_NAME = u'nhl.com'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)' _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)'
@ -32,28 +65,56 @@ class NHLIE(InfoExtractor):
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
info_json = self._download_webpage(json_url, video_id, info_json = self._download_webpage(json_url, video_id,
u'Downloading info json') u'Downloading info json')
info_json = info_json.replace('\\\'', '\'') info_json = self._fix_json(info_json)
info = json.loads(info_json)[0] info = json.loads(info_json)[0]
return self._extract_video(info)
class NHLVideocenterIE(NHLBaseInfoExtractor):
IE_NAME = u'nhl.com:videocenter'
IE_DESC = u'Download the first 12 videos from a videocenter category'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
@classmethod
def suitable(cls, url):
if NHLIE.suitable(url):
return False
return super(NHLVideocenterIE, cls).suitable(url)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
team = mobj.group('team')
webpage = self._download_webpage(url, team)
cat_id = self._search_regex(
[r'var defaultCatId = "(.+?)";',
r'{statusIndex:0,index:0,.*?id:(.*?),'],
webpage, u'category id')
playlist_title = self._html_search_regex(
r'\?catid=%s">(.*?)</a>' % cat_id,
webpage, u'playlist title', flags=re.DOTALL)
initial_video_url = info['publishPoint']
data = compat_urllib_parse.urlencode({ data = compat_urllib_parse.urlencode({
'type': 'fvod', 'cid': cat_id,
'path': initial_video_url.replace('.mp4', '_sd.mp4'), # This is the default value
'count': 12,
'ptrs': 3,
'format': 'json',
}) })
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data path = '/videocenter/servlets/browse?' + data
path_response = self._download_webpage(path_url, video_id, request_url = compat_urlparse.urljoin(url, path)
u'Downloading final video url') response = self._download_webpage(request_url, playlist_title)
path_doc = xml.etree.ElementTree.fromstring(path_response) response = self._fix_json(response)
video_url = path_doc.find('path').text if not response.strip():
self._downloader.report_warning(u'Got an empty reponse, trying '
u'adding the "newvideos" parameter')
response = self._download_webpage(request_url + '&newvideos=true',
playlist_title)
response = self._fix_json(response)
videos = json.loads(response)
join = compat_urlparse.urljoin
return { return {
'id': video_id, '_type': 'playlist',
'title': info['name'], 'title': playlist_title,
'url': video_url, 'id': cat_id,
'ext': determine_ext(video_url), 'entries': [self._extract_video(i) for i in videos],
'description': info['description'],
'duration': int(info['duration']),
'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
} }

Loading…
Cancel
Save