From 1fe8fb8c2022b70e6ed44d9c80570239eec74728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 6 Nov 2014 21:44:07 +0100 Subject: [PATCH] [vice] Re-add extractor (fixes #4120) The generic extraction no longer works. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vice.py | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 youtube_dl/extractor/vice.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3f85c99cd..3c1807f15 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -421,6 +421,7 @@ from .vesti import VestiIE from .vevo import VevoIE from .vgtv import VGTVIE from .vh1 import VH1IE +from .vice import ViceIE from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py new file mode 100644 index 000000000..f11ca8217 --- /dev/null +++ b/youtube_dl/extractor/vice.py @@ -0,0 +1,38 @@ +from __future__ import unicode_literals +import re + +from .common import InfoExtractor +from .ooyala import OoyalaIE +from ..utils import ExtractorError + + +class ViceIE(InfoExtractor): + _VALID_URL = r'http://www\.vice\.com/.*?/(?P.+)' + + _TEST = { + 'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1', + 'info_dict': { + 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', + 'ext': 'mp4', + 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', + }, + 'params': { + # Requires ffmpeg (m3u8 manifest) + 'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + webpage = self._download_webpage(url, name) + try: + embed_code = self._search_regex( + r'embedCode=([^&\'"]+)', webpage, + 'ooyala embed code') + ooyala_url = OoyalaIE._url_for_embed_code(embed_code) + print(ooyala_url) + except ExtractorError: + raise ExtractorError('The page doesn\'t contain a video', expected=True) + return self.url_result(ooyala_url, ie='Ooyala') +