From 0d7f03642976e7859e290b06db41d20a4bfd3a38 Mon Sep 17 00:00:00 2001 From: ping Date: Sat, 16 May 2015 15:43:13 +0800 Subject: [PATCH 1/5] [viki] Add support for shows --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/viki.py | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f293bc2a4..cb6635610 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -639,7 +639,10 @@ from .vine import ( VineIE, VineUserIE, ) -from .viki import VikiIE +from .viki import ( + VikiIE, + VikiShowIE, +) from .vk import ( VKIE, VKUserVideosIE, diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index cf6af1e5c..4d185c0e6 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -145,3 +145,36 @@ class VikiIE(InfoExtractor): 'ext': 'vtt', }] return res + + +class VikiShowIE(InfoExtractor): + IE_NAME = 'viki:show' + _VALID_URL = r'^https?://(?:www\.)?viki\.com/tv/(?P[0-9]+c)' + _TESTS = [{ + 'url': 'http://www.viki.com/tv/50c-boys-over-flowers', + 'info_dict': { + 'id': '50c', + 'title': 'Boys Over Flowers', + 'description': 'md5:ecd3cff47967fe193cff37c0bec52790', + }, + 'playlist_count': 25, + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + show_page = self._download_webpage(url, show_id, 'Download show page') + + title = self._og_search_title(show_page) + description = self._og_search_description(show_page) + + show_json = self._download_json( + 'http://api.viki.io/v4/containers/%s/episodes.json?app=100000a&per_page=999&sort=number&direction=asc' % show_id, + show_id, note='Retrieve show json', errnote='Unable to get show json' + ) + entries = [] + for video in show_json['response']: + video_id = video['id'] + entries.append(self.url_result( + 'http://www.viki.com/videos/%s' % video_id, 'Viki', video_id)) + + return self.playlist_result(entries, show_id, title, description) From 2f3bdab2b90c6695c0a478f352967b0c9da4f23f Mon Sep 17 00:00:00 2001 From: ping Date: Sat, 16 May 2015 15:56:37 +0800 Subject: [PATCH 2/5] [viki] Fix code format --- youtube_dl/extractor/viki.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 4d185c0e6..40a73f561 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -169,8 +169,7 @@ class VikiShowIE(InfoExtractor): show_json = self._download_json( 'http://api.viki.io/v4/containers/%s/episodes.json?app=100000a&per_page=999&sort=number&direction=asc' % show_id, - show_id, note='Retrieve show json', errnote='Unable to get show json' - ) + show_id, note='Retrieve show json', errnote='Unable to get show json') entries = [] for video in show_json['response']: video_id = video['id'] From 1c18de00192d195357989861563cc1fad9256128 Mon Sep 17 00:00:00 2001 From: ping Date: Sun, 17 May 2015 01:38:50 +0800 Subject: [PATCH 3/5] [viki] Add proper paging and include clips --- youtube_dl/extractor/viki.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 40a73f561..4d477b03c 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -93,7 +93,7 @@ class VikiIE(InfoExtractor): 'Video %s is blocked from your location.' % video_id, expected=True) else: - raise ExtractorError('Viki said: ' + err_msg) + raise ExtractorError('Viki said: %s %s' % (err_msg, url)) mobj = re.search( r']+type="(?P[^"]+)"[^>]+src="(?P[^"]+)"', info_webpage) if not mobj: @@ -157,7 +157,15 @@ class VikiShowIE(InfoExtractor): 'title': 'Boys Over Flowers', 'description': 'md5:ecd3cff47967fe193cff37c0bec52790', }, - 'playlist_count': 25, + 'playlist_count': 70, + }, { + 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete', + 'info_dict': { + 'id': '1354c', + 'title': 'Poor Nastya [COMPLETE]', + 'description': 'md5:05bf5471385aa8b21c18ad450e350525', + }, + 'playlist_count': 127, }] def _real_extract(self, url): @@ -167,13 +175,16 @@ class VikiShowIE(InfoExtractor): title = self._og_search_title(show_page) description = self._og_search_description(show_page) - show_json = self._download_json( - 'http://api.viki.io/v4/containers/%s/episodes.json?app=100000a&per_page=999&sort=number&direction=asc' % show_id, - show_id, note='Retrieve show json', errnote='Unable to get show json') entries = [] - for video in show_json['response']: - video_id = video['id'] - entries.append(self.url_result( - 'http://www.viki.com/videos/%s' % video_id, 'Viki', video_id)) + for video_type in ['episodes', 'clips']: + json_url = 'http://api.viki.io/v4/containers/%s/%s.json?app=100000a&per_page=25&sort=number&direction=asc&with_paging=true&page=1' % (show_id, video_type) + while json_url is not None: + show_json = self._download_json( + json_url, show_id, note='Retrieve show json', errnote='Unable to get show json') + for video in show_json['response']: + video_id = video['id'] + entries.append(self.url_result( + 'http://www.viki.com/videos/%s' % video_id, 'Viki', video_id)) + json_url = show_json['pagination']['next'] return self.playlist_result(entries, show_id, title, description) From 8da0e0e94682faa0463f33d991df70a2402b5a86 Mon Sep 17 00:00:00 2001 From: ping Date: Sun, 17 May 2015 06:19:38 +0800 Subject: [PATCH 4/5] [viki] Change IE name to channel, better message output --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/viki.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index cb6635610..21f7b7290 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -641,7 +641,7 @@ from .vine import ( ) from .viki import ( VikiIE, - VikiShowIE, + VikiChannelIE, ) from .vk import ( VKIE, diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 4d477b03c..9bdbdc3e4 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -147,8 +147,8 @@ class VikiIE(InfoExtractor): return res -class VikiShowIE(InfoExtractor): - IE_NAME = 'viki:show' +class VikiChannelIE(InfoExtractor): + IE_NAME = 'viki:channel' _VALID_URL = r'^https?://(?:www\.)?viki\.com/tv/(?P[0-9]+c)' _TESTS = [{ 'url': 'http://www.viki.com/tv/50c-boys-over-flowers', @@ -167,6 +167,7 @@ class VikiShowIE(InfoExtractor): }, 'playlist_count': 127, }] + _PER_PAGE = 25 def _real_extract(self, url): show_id = self._match_id(url) @@ -177,10 +178,12 @@ class VikiShowIE(InfoExtractor): entries = [] for video_type in ['episodes', 'clips']: - json_url = 'http://api.viki.io/v4/containers/%s/%s.json?app=100000a&per_page=25&sort=number&direction=asc&with_paging=true&page=1' % (show_id, video_type) + json_url = 'http://api.viki.io/v4/containers/%s/%s.json?app=100000a&per_page=%d&sort=number&direction=asc&with_paging=true&page=1' % (show_id, video_type, self._PER_PAGE) while json_url is not None: show_json = self._download_json( - json_url, show_id, note='Retrieve show json', errnote='Unable to get show json') + json_url, show_id, + note='Downloading %s json page #%s' % + (video_type, re.search(r'[?&]page=([0-9]+)', json_url).group(1))) for video in show_json['response']: video_id = video['id'] entries.append(self.url_result( From b0d619fde2b187f2b36b077a1eb11d766429f88c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 20 May 2015 21:28:04 +0600 Subject: [PATCH 5/5] [viki:channel] Extract title from JSON --- youtube_dl/extractor/viki.py | 40 +++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 9bdbdc3e4..fc585c299 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -23,7 +23,7 @@ class VikiIE(InfoExtractor): # iPad2 _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5' - _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P[0-9]+v)' + _VALID_URL = r'https?://(?:www\.)?viki\.com/videos/(?P[0-9]+v)' _TESTS = [{ 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', 'info_dict': { @@ -149,7 +149,7 @@ class VikiIE(InfoExtractor): class VikiChannelIE(InfoExtractor): IE_NAME = 'viki:channel' - _VALID_URL = r'^https?://(?:www\.)?viki\.com/tv/(?P[0-9]+c)' + _VALID_URL = r'https?://(?:www\.)?viki\.com/tv/(?P[0-9]+c)' _TESTS = [{ 'url': 'http://www.viki.com/tv/50c-boys-over-flowers', 'info_dict': { @@ -167,27 +167,35 @@ class VikiChannelIE(InfoExtractor): }, 'playlist_count': 127, }] + _API_BASE = 'http://api.viki.io/v4/containers' + _APP = '100000a' _PER_PAGE = 25 def _real_extract(self, url): - show_id = self._match_id(url) - show_page = self._download_webpage(url, show_id, 'Download show page') + channel_id = self._match_id(url) - title = self._og_search_title(show_page) - description = self._og_search_description(show_page) + channel = self._download_json( + '%s/%s.json?app=%s' % (self._API_BASE, channel_id, self._APP), + channel_id, 'Downloading channel JSON') + + titles = channel['titles'] + title = titles.get('en') or titles[titles.keys()[0]] + + descriptions = channel['descriptions'] + description = descriptions.get('en') or descriptions[descriptions.keys()[0]] entries = [] - for video_type in ['episodes', 'clips']: - json_url = 'http://api.viki.io/v4/containers/%s/%s.json?app=100000a&per_page=%d&sort=number&direction=asc&with_paging=true&page=1' % (show_id, video_type, self._PER_PAGE) - while json_url is not None: - show_json = self._download_json( - json_url, show_id, - note='Downloading %s json page #%s' % - (video_type, re.search(r'[?&]page=([0-9]+)', json_url).group(1))) - for video in show_json['response']: + for video_type in ('episodes', 'clips'): + page_url = '%s/%s/%s.json?app=%s&per_page=%d&sort=number&direction=asc&with_paging=true&page=1' % (self._API_BASE, channel_id, video_type, self._APP, self._PER_PAGE) + while page_url: + page = self._download_json( + page_url, channel_id, + 'Downloading %s JSON page #%s' + % (video_type, re.search(r'[?&]page=([0-9]+)', page_url).group(1))) + for video in page['response']: video_id = video['id'] entries.append(self.url_result( 'http://www.viki.com/videos/%s' % video_id, 'Viki', video_id)) - json_url = show_json['pagination']['next'] + page_url = page['pagination']['next'] - return self.playlist_result(entries, show_id, title, description) + return self.playlist_result(entries, channel_id, title, description)