From 77d2b106ccf81e50cf41a7aa0bb320433a84e110 Mon Sep 17 00:00:00 2001 From: robin Date: Sun, 8 Feb 2015 13:42:41 +0100 Subject: [PATCH 1/9] [Gamekings] Fix 404 when large isn't available When trying to download some GameKings videos, not all worked. This was because not all videos had a "/large"-URL available. The extractor checks now if the /large URL is available, if it isn't, it tries to get the normal URL. --- youtube_dl/extractor/gamekings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index cf8e90d7d..bac325bd8 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -31,6 +31,8 @@ class GamekingsIE(InfoExtractor): # Todo: add medium format video_url = video_url.replace(video_id, 'large/' + video_id) + if not (self._is_valid_url(video_url, video_id)): + video_url = video_url.replace(video_id + '/large', video_id) return { 'id': video_id, From f23a3ca69975346d5fc30ee09b0cdceb4a384879 Mon Sep 17 00:00:00 2001 From: robin Date: Sun, 8 Feb 2015 13:47:27 +0100 Subject: [PATCH 2/9] [Gamekings] Fixed typo in URL replacement --- youtube_dl/extractor/gamekings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index bac325bd8..1697f5e44 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -32,7 +32,7 @@ class GamekingsIE(InfoExtractor): # Todo: add medium format video_url = video_url.replace(video_id, 'large/' + video_id) if not (self._is_valid_url(video_url, video_id)): - video_url = video_url.replace(video_id + '/large', video_id) + video_url = video_url.replace(video_id + 'large/', video_id) return { 'id': video_id, From 2f38289b79ed6c265f2e7cc91e417d0f2178371a Mon Sep 17 00:00:00 2001 From: robin Date: Sun, 8 Feb 2015 13:49:32 +0100 Subject: [PATCH 3/9] [Gamekings] Fix order of replacement string Oops. --- youtube_dl/extractor/gamekings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index 1697f5e44..a5d854ade 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -32,7 +32,7 @@ class GamekingsIE(InfoExtractor): # Todo: add medium format video_url = video_url.replace(video_id, 'large/' + video_id) if not (self._is_valid_url(video_url, video_id)): - video_url = video_url.replace(video_id + 'large/', video_id) + video_url = video_url.replace('large/' + video_id, video_id) return { 'id': video_id, From ba322d82090bd1126774e772b699283121ffa4b8 Mon Sep 17 00:00:00 2001 From: robin Date: Sun, 8 Feb 2015 14:23:37 +0100 Subject: [PATCH 4/9] [Gamekings] Added test and replaced video_url Quick and dirty fix for the Gamekings extractor. It gives an error about the video_url, but it downloads it now instead of giving a 404 error on newer Gamekings videos --- youtube_dl/extractor/gamekings.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index a5d854ade..aa589390a 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -7,7 +7,8 @@ from .common import InfoExtractor class GamekingsIE(InfoExtractor): _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P[0-9a-z\-]+)' - _TEST = { + _TESTS = [ + { 'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', # MD5 is flaky, seems to change regularly # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', @@ -16,8 +17,17 @@ class GamekingsIE(InfoExtractor): 'ext': 'mp4', 'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', 'description': 'md5:36fd701e57e8c15ac8682a2374c99731', + } + }, + { + 'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/', + 'info_dict': { + 'id': '118933752', + 'ext': 'mp4', + 'title': 'The Legend of Zelda: Majora’s Mask' + } } - } + ] def _real_extract(self, url): @@ -33,6 +43,7 @@ class GamekingsIE(InfoExtractor): video_url = video_url.replace(video_id, 'large/' + video_id) if not (self._is_valid_url(video_url, video_id)): video_url = video_url.replace('large/' + video_id, video_id) + video_url = video_url.replace('http://stream.gamekings.tv/', '') return { 'id': video_id, From 8ca8cbe2bd262676cbe5a63aaee6559dfa795634 Mon Sep 17 00:00:00 2001 From: robin Date: Sun, 8 Feb 2015 14:41:14 +0100 Subject: [PATCH 5/9] [Gamekings] Check string for vimeo, fix test The test now doesn't fail anymore. It just checks the string for having "vimeo" in it, instead of using the method for URL-checking, since it's returns an error. The tests don't fail, and the extractor works fine now. --- youtube_dl/extractor/gamekings.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index aa589390a..ba4d2e082 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -24,7 +24,8 @@ class GamekingsIE(InfoExtractor): 'info_dict': { 'id': '118933752', 'ext': 'mp4', - 'title': 'The Legend of Zelda: Majora’s Mask' + 'title': 'The Legend of Zelda: Majora’s Mask', + 'description': 'md5:9917825fe0e9f4057601fe1e38860de3' } } ] @@ -41,7 +42,7 @@ class GamekingsIE(InfoExtractor): # Todo: add medium format video_url = video_url.replace(video_id, 'large/' + video_id) - if not (self._is_valid_url(video_url, video_id)): + if "vimeo" in video_url: video_url = video_url.replace('large/' + video_id, video_id) video_url = video_url.replace('http://stream.gamekings.tv/', '') From 5d678df64a7a4a695da769f7691c86f39fca26bb Mon Sep 17 00:00:00 2001 From: robin Date: Sun, 8 Feb 2015 15:34:37 +0100 Subject: [PATCH 6/9] [Gamekings] Download playlist Todo: URL and Thumbnail should be extracted with XPath --- youtube_dl/extractor/gamekings.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index ba4d2e082..f2760d444 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor - +from ..utils import xpath_text class GamekingsIE(InfoExtractor): _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P[0-9a-z\-]+)' @@ -35,6 +35,17 @@ class GamekingsIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') webpage = self._download_webpage(url, name) + + playlist_id = re.search(r'(?:gogoVideo)\(\d+,"?(?P.*)"', webpage, re.MULTILINE).group('playlist_id') + playlist_url = 'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=' + playlist_id + playlist_rss = self._download_xml(playlist_url, playlist_id) + + NS_MAP { + 'rss': 'http://rss.jwpcdn.com/' + } + + # Todo: Implement Xpath for searching the video link + video_url = self._og_search_video_url(webpage) video = re.search(r'[0-9]+', video_url) From 955c5505e7df737b452d54d869958a8f5425edf5 Mon Sep 17 00:00:00 2001 From: robin Date: Sun, 8 Feb 2015 16:44:25 +0100 Subject: [PATCH 7/9] [Gamekings] Use xpath XPath is used for extracting the video url and the thumbnail --- youtube_dl/extractor/gamekings.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index f2760d444..3c1f7f195 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -3,7 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import xpath_text +from ..utils import ( + xpath_text, + xpath_with_ns + ) + class GamekingsIE(InfoExtractor): _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P[0-9a-z\-]+)' @@ -39,23 +43,22 @@ class GamekingsIE(InfoExtractor): playlist_id = re.search(r'(?:gogoVideo)\(\d+,"?(?P.*)"', webpage, re.MULTILINE).group('playlist_id') playlist_url = 'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=' + playlist_id playlist_rss = self._download_xml(playlist_url, playlist_id) + - NS_MAP { - 'rss': 'http://rss.jwpcdn.com/' + NS_MAP = { + 'jwplayer': 'http://rss.jwpcdn.com/' } - # Todo: Implement Xpath for searching the video link + item = playlist_rss.find('./channel/item') - video_url = self._og_search_video_url(webpage) - + image = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'image') + file_node = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)) + + video_url = file_node.get('file') video = re.search(r'[0-9]+', video_url) video_id = video.group(0) - - # Todo: add medium format - video_url = video_url.replace(video_id, 'large/' + video_id) - if "vimeo" in video_url: - video_url = video_url.replace('large/' + video_id, video_id) - video_url = video_url.replace('http://stream.gamekings.tv/', '') + + # Todo: Add medium format return { 'id': video_id, From c36b09a5026172c1ca452038fffccd68b14c528c Mon Sep 17 00:00:00 2001 From: robin Date: Sun, 8 Feb 2015 16:46:13 +0100 Subject: [PATCH 8/9] [Gamekings] Use thumbnail in return statement --- youtube_dl/extractor/gamekings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index 3c1f7f195..1821f26d8 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -66,4 +66,5 @@ class GamekingsIE(InfoExtractor): 'url': video_url, 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), + 'thumbnail': image } From d87ec897e99d503e30fb677b0b4acf7256fa6143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Feb 2015 23:03:12 +0600 Subject: [PATCH 9/9] [gamekings] Improve extraction --- youtube_dl/extractor/gamekings.py | 64 ++++++++++++++----------------- 1 file changed, 29 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index 1821f26d8..929779f60 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -1,70 +1,64 @@ +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( xpath_text, - xpath_with_ns - ) + xpath_with_ns, +) class GamekingsIE(InfoExtractor): - _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P[0-9a-z\-]+)' - _TESTS = [ - { + _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P[^/]+)' + _TESTS = [{ 'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', # MD5 is flaky, seems to change regularly # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', 'info_dict': { - 'id': '20130811', + 'id': 'phoenix-wright-ace-attorney-dual-destinies-review', 'ext': 'mp4', 'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', 'description': 'md5:36fd701e57e8c15ac8682a2374c99731', - } + 'thumbnail': 're:^https?://.*\.jpg$', }, - { + }, { + # vimeo video 'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/', + 'md5': '12bf04dfd238e70058046937657ea68d', 'info_dict': { - 'id': '118933752', + 'id': 'the-legend-of-zelda-majoras-mask', 'ext': 'mp4', 'title': 'The Legend of Zelda: Majora’s Mask', - 'description': 'md5:9917825fe0e9f4057601fe1e38860de3' - } - } - ] + 'description': 'md5:9917825fe0e9f4057601fe1e38860de3', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }] def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) - mobj = re.match(self._VALID_URL, url) - name = mobj.group('name') - webpage = self._download_webpage(url, name) + playlist_id = self._search_regex( + r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id') - playlist_id = re.search(r'(?:gogoVideo)\(\d+,"?(?P.*)"', webpage, re.MULTILINE).group('playlist_id') - playlist_url = 'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=' + playlist_id - playlist_rss = self._download_xml(playlist_url, playlist_id) - + playlist = self._download_xml( + 'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id, + video_id) NS_MAP = { 'jwplayer': 'http://rss.jwpcdn.com/' - } + } + + item = playlist.find('./channel/item') - item = playlist_rss.find('./channel/item') - - image = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'image') - file_node = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)) - - video_url = file_node.get('file') - video = re.search(r'[0-9]+', video_url) - video_id = video.group(0) - - # Todo: Add medium format + thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail') + video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file') return { 'id': video_id, - 'ext': 'mp4', 'url': video_url, 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), - 'thumbnail': image + 'thumbnail': thumbnail, }