From d87ec897e99d503e30fb677b0b4acf7256fa6143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Feb 2015 23:03:12 +0600 Subject: [PATCH] [gamekings] Improve extraction --- youtube_dl/extractor/gamekings.py | 64 ++++++++++++++----------------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index 1821f26d8..929779f60 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -1,70 +1,64 @@ +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( xpath_text, - xpath_with_ns - ) + xpath_with_ns, +) class GamekingsIE(InfoExtractor): - _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P[0-9a-z\-]+)' - _TESTS = [ - { + _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P[^/]+)' + _TESTS = [{ 'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', # MD5 is flaky, seems to change regularly # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', 'info_dict': { - 'id': '20130811', + 'id': 'phoenix-wright-ace-attorney-dual-destinies-review', 'ext': 'mp4', 'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', 'description': 'md5:36fd701e57e8c15ac8682a2374c99731', - } + 'thumbnail': 're:^https?://.*\.jpg$', }, - { + }, { + # vimeo video 'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/', + 'md5': '12bf04dfd238e70058046937657ea68d', 'info_dict': { - 'id': '118933752', + 'id': 'the-legend-of-zelda-majoras-mask', 'ext': 'mp4', 'title': 'The Legend of Zelda: Majora’s Mask', - 'description': 'md5:9917825fe0e9f4057601fe1e38860de3' - } - } - ] + 'description': 'md5:9917825fe0e9f4057601fe1e38860de3', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }] def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) - mobj = re.match(self._VALID_URL, url) - name = mobj.group('name') - webpage = self._download_webpage(url, name) + playlist_id = self._search_regex( + r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id') - playlist_id = re.search(r'(?:gogoVideo)\(\d+,"?(?P.*)"', webpage, re.MULTILINE).group('playlist_id') - playlist_url = 'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=' + playlist_id - playlist_rss = self._download_xml(playlist_url, playlist_id) - + playlist = self._download_xml( + 'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id, + video_id) NS_MAP = { 'jwplayer': 'http://rss.jwpcdn.com/' - } + } + + item = playlist.find('./channel/item') - item = playlist_rss.find('./channel/item') - - image = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'image') - file_node = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)) - - video_url = file_node.get('file') - video = re.search(r'[0-9]+', video_url) - video_id = video.group(0) - - # Todo: Add medium format + thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail') + video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file') return { 'id': video_id, - 'ext': 'mp4', 'url': video_url, 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), - 'thumbnail': image + 'thumbnail': thumbnail, }