From d1ea5e171f817d94bc356a96e7eb6967919cc18d Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 11 Feb 2016 10:30:31 +0100 Subject: [PATCH] [plays] Add new extractor(#8458) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/plays.py | 50 ++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 youtube_dl/extractor/plays.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f08f27480..f1e5a5e86 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -534,6 +534,7 @@ from .planetaplay import PlanetaPlayIE from .pladform import PladformIE from .played import PlayedIE from .playfm import PlayFMIE +from .plays import PlaysTVIE from .playtvak import PlaytvakIE from .playvid import PlayvidIE from .playwire import PlaywireIE diff --git a/youtube_dl/extractor/plays.py b/youtube_dl/extractor/plays.py new file mode 100644 index 000000000..2aba7cb9c --- /dev/null +++ b/youtube_dl/extractor/plays.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PlaysTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P[0-9a-f]{18})' + _TEST = { + 'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', + 'md5': 'dfeac1198506652b5257a62762cec7bc', + 'info_dict': { + 'id': '56af17f56c95335490', + 'ext': 'mp4', + 'title': 'When you outplay the Azir wall', + 'description': 'Posted by Bjergsen', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage) + content = self._parse_json( + self._search_regex(r'R\.bindContent\(({.+?})\);', webpage, + 'content'), video_id)['content'] + mpd_url, sources = re.search( + r'(?s)]+data-mpd="([^"]+)"[^>]*>(.+?)', + content).groups() + formats = self._extract_mpd_formats( + self._proto_relative_url(mpd_url), video_id, mpd_id='DASH') + for format_id, height, format_url in re.findall(r'