From e81a47460365738a0add4d4da52a712c0091704f Mon Sep 17 00:00:00 2001 From: snipem Date: Fri, 3 Apr 2015 15:34:49 +0200 Subject: [PATCH 1/3] [Gamersyde] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/gamersyde.py | 64 +++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 youtube_dl/extractor/gamersyde.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index aae4aae4c..2935d5b33 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -177,6 +177,7 @@ from .gameone import ( GameOneIE, GameOnePlaylistIE, ) +from .gamersyde import GamersydeIE from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gametrailers import GametrailersIE diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py new file mode 100644 index 000000000..c40106216 --- /dev/null +++ b/youtube_dl/extractor/gamersyde.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals +import re +import json +import time +from .common import InfoExtractor + + +class GamersydeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_' + _TEST = { + 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', + 'md5': 'f38d400d32f19724570040d5ce3a505f', + 'info_dict': { + 'id': '34371', + 'ext': 'mp4', + 'title': 'Bloodborne - Birth of a hero', + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _calculateDuration(self, durationString): + duration = time.strptime(durationString, "%M minutes %S seconds") + return duration.tm_min * 60 + duration.tm_sec + + def _fixJsonSyntax(self, json): + + json = re.sub(r"{\s*(\w)", r'{"\1', json) + json = re.sub(r",\s*(\w)", r',"\1', json) + json = re.sub(r"(\w): ", r'\1":', json) + json = re.sub(r",\s*}", "}", json, flags=re.DOTALL) + json = re.sub(r",\s*]", "]", json, flags=re.DOTALL) + + return json + + def _real_extract(self, url): + + video_id = self._search_regex(r'-(.*?)_[a-z]{2}.html$', url, 'video_id') + webpage = self._download_webpage(url, video_id) + + filesJson = self._search_regex(r'playlist: (.*?)\}\);', webpage, 'files', flags=re.DOTALL) + filesJson = self._fixJsonSyntax(filesJson) + + data = json.loads(filesJson) + playlist = data[0] + + formats = [] + + title = re.sub(r"[0-9]+ - ", "", playlist['title']) + + for playlistEntry in playlist['sources']: + format = { + 'url': playlistEntry['file'], + 'format_id': playlistEntry['label'] + } + + formats.append(format) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': playlist['image'] + } From 115c281672bd7479f87c48249f6a0186ac7d19cc Mon Sep 17 00:00:00 2001 From: snipem Date: Sat, 4 Apr 2015 12:31:48 +0200 Subject: [PATCH 2/3] [Gamersyde] Improved robustness, added duration and tests Fix for Json syntax is now less error prone for Json syntax inside of values. Extractor is now also using native Json handling. Added tests for several videos that were producing errors in the first place. --- youtube_dl/extractor/gamersyde.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py index c40106216..5c68a6891 100644 --- a/youtube_dl/extractor/gamersyde.py +++ b/youtube_dl/extractor/gamersyde.py @@ -8,7 +8,6 @@ from .common import InfoExtractor class GamersydeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_' - _TEST = { 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', 'md5': 'f38d400d32f19724570040d5ce3a505f', 'info_dict': { @@ -17,6 +16,11 @@ class GamersydeIE(InfoExtractor): 'title': 'Bloodborne - Birth of a hero', 'thumbnail': 're:^https?://.*\.jpg$', } + }, + { + 'url': 'http://www.gamersyde.com/hqstream_dark_souls_ii_scholar_of_the_first_sin_gameplay_part_1-34417_en.html', + 'info_dict': { + 'ext': 'mp4', } def _calculateDuration(self, durationString): @@ -27,7 +31,6 @@ class GamersydeIE(InfoExtractor): json = re.sub(r"{\s*(\w)", r'{"\1', json) json = re.sub(r",\s*(\w)", r',"\1', json) - json = re.sub(r"(\w): ", r'\1":', json) json = re.sub(r",\s*}", "}", json, flags=re.DOTALL) json = re.sub(r",\s*]", "]", json, flags=re.DOTALL) @@ -40,7 +43,6 @@ class GamersydeIE(InfoExtractor): filesJson = self._search_regex(r'playlist: (.*?)\}\);', webpage, 'files', flags=re.DOTALL) filesJson = self._fixJsonSyntax(filesJson) - data = json.loads(filesJson) playlist = data[0] From 3d24d997ae1f92686aa7edd0bfeed28353fbfb2e Mon Sep 17 00:00:00 2001 From: snipem Date: Sat, 4 Apr 2015 12:42:14 +0200 Subject: [PATCH 3/3] Fixed intendation of test cases Leaded to error on Linux machine --- youtube_dl/extractor/gamersyde.py | 45 ++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py index 5c68a6891..cc6fa4037 100644 --- a/youtube_dl/extractor/gamersyde.py +++ b/youtube_dl/extractor/gamersyde.py @@ -1,39 +1,62 @@ # coding: utf-8 from __future__ import unicode_literals import re -import json import time + from .common import InfoExtractor class GamersydeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_' + _TESTS = [{ 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', 'md5': 'f38d400d32f19724570040d5ce3a505f', 'info_dict': { 'id': '34371', 'ext': 'mp4', + 'duration': 372, 'title': 'Bloodborne - Birth of a hero', 'thumbnail': 're:^https?://.*\.jpg$', } - }, - { + }, { 'url': 'http://www.gamersyde.com/hqstream_dark_souls_ii_scholar_of_the_first_sin_gameplay_part_1-34417_en.html', + 'md5': '94bd7c3feff3275576cf5cb6c8a3a720', + 'info_dict': { + 'id': '34417', + 'ext': 'mp4', + 'duration': 270, + 'title': 'Dark Souls II: Scholar of the First Sin - Gameplay - Part 1', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, { + 'url': 'http://www.gamersyde.com/hqstream_grand_theft_auto_v_heists_trailer-33786_en.html', + 'md5': '65e442f5f340d571ece8c80d50700369', 'info_dict': { + 'id': '33786', 'ext': 'mp4', + 'duration': 59, + 'title': 'Grand Theft Auto V - Heists Trailer', + 'thumbnail': 're:^https?://.*\.jpg$', + } } + ] def _calculateDuration(self, durationString): - duration = time.strptime(durationString, "%M minutes %S seconds") + if (durationString.find("minutes") > -1): + duration = time.strptime(durationString, "%M minutes %S seconds") + else: + duration = time.strptime(durationString, "%S seconds") return duration.tm_min * 60 + duration.tm_sec def _fixJsonSyntax(self, json): - json = re.sub(r"{\s*(\w)", r'{"\1', json) - json = re.sub(r",\s*(\w)", r',"\1', json) json = re.sub(r",\s*}", "}", json, flags=re.DOTALL) json = re.sub(r",\s*]", "]", json, flags=re.DOTALL) - + json = json.replace('file: "', '"file": "') + json = json.replace('title: "', '"title": "') + json = json.replace('label: "', '"label": "') + json = json.replace('image: "', '"image": "') + json = json.replace('sources: [', '"sources": [') return json def _real_extract(self, url): @@ -42,13 +65,16 @@ class GamersydeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) filesJson = self._search_regex(r'playlist: (.*?)\}\);', webpage, 'files', flags=re.DOTALL) - filesJson = self._fixJsonSyntax(filesJson) - data = json.loads(filesJson) + data = self._parse_json(filesJson,video_id, transform_source=self._fixJsonSyntax) + playlist = data[0] formats = [] title = re.sub(r"[0-9]+ - ", "", playlist['title']) + + length = self._search_regex(r'(([0-9]{1,2} minutes ){0,1}[0-9]{1,2} seconds)', webpage, 'length') + duration = self._calculateDuration(length) for playlistEntry in playlist['sources']: format = { @@ -62,5 +88,6 @@ class GamersydeIE(InfoExtractor): 'id': video_id, 'title': title, 'formats': formats, + 'duration': duration, 'thumbnail': playlist['image'] }