From d79a0e233a329e543797478a2eeb377e469c0f3f Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Tue, 17 Sep 2013 22:13:40 +0200 Subject: [PATCH 1/5] Extractor for websurg.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/websurg.py | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 youtube_dl/extractor/websurg.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 761575062..19ded18f1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -109,6 +109,7 @@ from .videofyme import VideofyMeIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE from .wat import WatIE +from .websurg import WeBSurgIE from .weibo import WeiboIE from .wimp import WimpIE from .worldstarhiphop import WorldStarHipHopIE diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py new file mode 100644 index 000000000..953bc9831 --- /dev/null +++ b/youtube_dl/extractor/websurg.py @@ -0,0 +1,67 @@ +# coding: utf-8 + +import re + +from ..utils import ( + compat_urllib_request, + compat_urllib_parse +) + +from .common import InfoExtractor + +class WeBSurgIE(InfoExtractor): + IE_NAME = u'websurg.com' + _VALID_URL = r'http://.*?\.websurg\.com/MEDIA/\?noheader=1&doi=(.*)' + + _TEST = { + u'url': u'http://www.websurg.com/MEDIA/?noheader=1&doi=vd01en4012', + u'file': u'vd01en4012.mp4', + u'params': { + u'skip_download': True, + } + } + + _LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1' + + def _real_extract(self, url): + + login_form = { + 'username': self._downloader.params['username'], + 'password': self._downloader.params['password'], + 'Submit': 1 + } + + request = compat_urllib_request.Request( + self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) + request.add_header( + 'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8') + login_results = compat_urllib_request.urlopen(request).info() + + sessid = re.match(r'PHPSESSID=(.*);', + login_results['Set-Cookie']).group(1) + request = compat_urllib_request.Request( + url, compat_urllib_parse.urlencode(login_form), + {'Cookie': 'PHPSESSID=' + sessid + ';'}) + webpage = compat_urllib_request.urlopen(request).read() + + video_id = re.match(self._VALID_URL, url).group(1) + + url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage) + + if url_info is None: + self._downloader.report_warning( + u'Unable to log in: bad username/password') + return + + return {'id': video_id, + 'title' : re.search( + r'property="og:title" content="(.*?)" />' + , webpage).group(1), + 'description': re.search( + r'name="description" content="(.*?)" />', webpage).group(1), + 'ext' : 'mp4', + 'url' : url_info.group(1) + '/' + url_info.group(2), + 'thumbnail': re.search( + r'property="og:image" content="(.*?)" />', webpage + ).group(1) + } From cc6943e86aef74bef767be7f4027ab6122c95d55 Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Wed, 18 Sep 2013 00:07:04 +0200 Subject: [PATCH 2/5] Improvements --- youtube_dl/extractor/websurg.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py index 953bc9831..efc8029af 100644 --- a/youtube_dl/extractor/websurg.py +++ b/youtube_dl/extractor/websurg.py @@ -23,7 +23,7 @@ class WeBSurgIE(InfoExtractor): _LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1' - def _real_extract(self, url): + def _real_initialize(self): login_form = { 'username': self._downloader.params['username'], @@ -35,14 +35,13 @@ class WeBSurgIE(InfoExtractor): self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) request.add_header( 'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8') - login_results = compat_urllib_request.urlopen(request).info() + compat_urllib_request.urlopen(request).info() - sessid = re.match(r'PHPSESSID=(.*);', - login_results['Set-Cookie']).group(1) - request = compat_urllib_request.Request( - url, compat_urllib_parse.urlencode(login_form), - {'Cookie': 'PHPSESSID=' + sessid + ';'}) - webpage = compat_urllib_request.urlopen(request).read() + def _real_extract(self, url): + + request = compat_urllib_request.Request(url) + webpage = unicode( + compat_urllib_request.urlopen(request).read(), 'utf-8') video_id = re.match(self._VALID_URL, url).group(1) @@ -52,16 +51,10 @@ class WeBSurgIE(InfoExtractor): self._downloader.report_warning( u'Unable to log in: bad username/password') return - return {'id': video_id, - 'title' : re.search( - r'property="og:title" content="(.*?)" />' - , webpage).group(1), - 'description': re.search( - r'name="description" content="(.*?)" />', webpage).group(1), + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), 'ext' : 'mp4', 'url' : url_info.group(1) + '/' + url_info.group(2), - 'thumbnail': re.search( - r'property="og:image" content="(.*?)" />', webpage - ).group(1) + 'thumbnail': self._og_search_thumbnail(webpage) } From 5c1d63b73737bb23885ae6079e2004b5f084eb9c Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Fri, 4 Oct 2013 01:04:38 +0200 Subject: [PATCH 3/5] Changes suggested by @phihag --- youtube_dl/extractor/websurg.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py index efc8029af..849334aa0 100644 --- a/youtube_dl/extractor/websurg.py +++ b/youtube_dl/extractor/websurg.py @@ -36,21 +36,21 @@ class WeBSurgIE(InfoExtractor): request.add_header( 'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8') compat_urllib_request.urlopen(request).info() + request = compat_urllib_request.Request(self._LOGIN_URL) + webpage = compat_urllib_request.urlopen(request).read() - def _real_extract(self, url): - - request = compat_urllib_request.Request(url) - webpage = unicode( - compat_urllib_request.urlopen(request).read(), 'utf-8') + if webpage != 'OK': + self._downloader.report_error( + u'Unable to log in: bad username/password') + def _real_extract(self, url): video_id = re.match(self._VALID_URL, url).group(1) + request = compat_urllib_request.Request(url) + webpage = self._download_webpage(url, video_id) + url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage) - if url_info is None: - self._downloader.report_warning( - u'Unable to log in: bad username/password') - return return {'id': video_id, 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), From b039775057abf6005ceef2819a746c9f3b671cd3 Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Fri, 4 Oct 2013 01:07:24 +0200 Subject: [PATCH 4/5] Unused variable --- youtube_dl/extractor/websurg.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py index 849334aa0..96a1bb852 100644 --- a/youtube_dl/extractor/websurg.py +++ b/youtube_dl/extractor/websurg.py @@ -46,7 +46,6 @@ class WeBSurgIE(InfoExtractor): def _real_extract(self, url): video_id = re.match(self._VALID_URL, url).group(1) - request = compat_urllib_request.Request(url) webpage = self._download_webpage(url, video_id) url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage) From 73b4fafd82256c66198b1670d1a6dccfaf5f782c Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Fri, 4 Oct 2013 01:12:42 +0200 Subject: [PATCH 5/5] Use self._download_webpage everywhere --- youtube_dl/extractor/websurg.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/websurg.py b/youtube_dl/extractor/websurg.py index 96a1bb852..7d335d444 100644 --- a/youtube_dl/extractor/websurg.py +++ b/youtube_dl/extractor/websurg.py @@ -36,8 +36,7 @@ class WeBSurgIE(InfoExtractor): request.add_header( 'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8') compat_urllib_request.urlopen(request).info() - request = compat_urllib_request.Request(self._LOGIN_URL) - webpage = compat_urllib_request.urlopen(request).read() + webpage = self._download_webpage(self._LOGIN_URL, '', 'Logging in') if webpage != 'OK': self._downloader.report_error(