From 408b5839b186ffaea09a67e584b8da33525ed967 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sun, 14 Dec 2014 17:59:25 +0200 Subject: [PATCH] [yesjapan] Add new extractor (Closes #4466) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/yesjapan.py | 62 ++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 youtube_dl/extractor/yesjapan.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3ae7a8a52..caf8c04f1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -510,6 +510,7 @@ from .yahoo import ( YahooIE, YahooSearchIE, ) +from .yesjapan import YesJapanIE from .ynet import YnetIE from .youjizz import YouJizzIE from .youku import YoukuIE diff --git a/youtube_dl/extractor/yesjapan.py b/youtube_dl/extractor/yesjapan.py new file mode 100644 index 000000000..fec1ad1ac --- /dev/null +++ b/youtube_dl/extractor/yesjapan.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + HEADRequest, + get_element_by_attribute, + parse_iso8601, +) + + +class YesJapanIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?yesjapan\.com/video/(?P[A-Za-z0-9\-]*)_(?P[A-Za-z0-9]+)\.html' + _TEST = { + 'url': 'http://www.yesjapan.com/video/japanese-in-5-20-wa-and-ga-particle-usages_726497834.html', + 'md5': 'f0be416314e5be21a12b499b330c21cf', + 'info_dict': { + 'id': '726497834', + 'title': 'Japanese in 5! #20 - WA And GA Particle Usages', + 'description': 'This should clear up some issues most students of Japanese encounter with WA and GA....', + 'ext': 'mp4', + 'timestamp': 1416391590, + 'upload_date': '20141119', + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + video_url = self._og_search_video_url(webpage) + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + timestamp = None + submit_info = get_element_by_attribute('class', 'pm-submit-data', webpage) + if submit_info: + timestamp = parse_iso8601(self._search_regex( + r'datetime="([^"]+)"', webpage, 'upload date', fatal=False, default=None)) + + # attempt to resolve the final URL in order to get a proper extension + redirect_req = HEADRequest(video_url) + req = self._request_webpage( + redirect_req, video_id, note='Resolving final URL', errnote='Could not resolve final URL', fatal=False) + if req: + video_url = req.geturl() + + formats = [{ + 'format_id': 'sd', + 'url': video_url, + }] + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': description, + 'timestamp': timestamp, + 'thumbnail': thumbnail, + }