From 3d9fae1ed2da722faed44d4f89143f05797ab4d9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 13 Sep 2014 07:07:39 +0200 Subject: [PATCH] Add support for PornoXO --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/pornoxo.py | 66 ++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 youtube_dl/extractor/pornoxo.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f1a5b2608..1bf5c51b4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -267,6 +267,7 @@ from .podomatic import PodomaticIE from .pornhd import PornHdIE from .pornhub import PornHubIE from .pornotube import PornotubeIE +from .pornoxo import PornoXOIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py new file mode 100644 index 000000000..0d9656d39 --- /dev/null +++ b/youtube_dl/extractor/pornoxo.py @@ -0,0 +1,66 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + str_to_int, +) + +class PornoXOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P\d+)/(?P[^/]+)\.html' + _TEST = { + 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', + 'md5': '582f28ecbaa9e6e24cb90f50f524ce87', + 'info_dict': { + 'id': '7564', + 'ext': 'flv', + 'title': 'Striptease From Sexy Secretary!', + 'description': 'Striptease From Sexy Secretary!', + 'categories': list, # NSFW + 'thumbnail': 're:https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'\'file\'\s*:\s*"([^"]+)"', webpage, 'video_url') + + title = self._html_search_regex( + r'([^<]+)\s*-\s*PornoXO', webpage, 'title') + + description = self._html_search_regex( + r'<meta name="description" content="([^"]+)\s*featuring', + webpage, 'description', fatal=False) + + thumbnail = self._html_search_regex( + r'\'image\'\s*:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) + + view_count = str_to_int(self._html_search_regex( + r'Views:\s*(\d+)', webpage, 'view count', fatal=False)) + + categories_str = self._html_search_regex( + r'<meta name="description" content=".*featuring\s*([^"]+)"', + webpage, 'categories', fatal=False) + categories = ( + None if categories_str is None + else categories_str.split(',')) + + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'categories': categories, + 'view_count': view_count, + 'age_limit': 18, + }