diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d118daa68..76726305a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -440,6 +440,7 @@ from .vporn import VpornIE from .vube import VubeIE from .vuclip import VuClipIE from .vulture import VultureIE +from .walla import WallaIE from .washingtonpost import WashingtonPostIE from .wat import WatIE from .wayofthemaster import WayOfTheMasterIE diff --git a/youtube_dl/extractor/walla.py b/youtube_dl/extractor/walla.py new file mode 100644 index 000000000..e687c3af0 --- /dev/null +++ b/youtube_dl/extractor/walla.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +import re + +from .common import InfoExtractor + + +class WallaIE(InfoExtractor): + _VALID_URL = r'http://vod\.walla\.co\.il/\w+/(?P\d+)' + _TEST = { + 'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one', + 'info_dict': { + 'id': '2642630', + 'ext': 'flv', + 'title': 'וואן דיירקשן: ההיסטריה', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + + config_url = 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id + + webpage = self._download_webpage(config_url, video_id, '') + + media_id = self._html_search_regex(r'(\d+)', webpage, video_id, 'extract media id') + + prefix = '0' if len(media_id) == 7 else '' + + series = '%s%s' % (prefix, media_id[0:2]) + session = media_id[2:5] + episode = media_id[5:7] + + title = self._html_search_regex(r'(.*)', webpage, video_id, 'title') + + default_quality = self._html_search_regex(r'', webpage, video_id, 0) + + quality = default_quality if default_quality else '40' + + media_path = '/%s/%s/%s' % (series, session, media_id) #self._html_search_regex(r'.*(.*)' % default_quality ,webpage, '', flags=re.DOTALL) + + playpath = 'mp4:media/%s/%s/%s-%s' % (series, session, media_id, quality) #self._html_search_regex(r'.*(.*)' % default_quality ,webpage, '', flags=re.DOTALL) + + subtitles = {} + + subtitle_url = self._html_search_regex(r'(.*).*', webpage, video_id, 0) + + print subtitle_url + + if subtitle_url: + subtitles_page = self._download_webpage(subtitle_url, video_id, '') + subtitles['heb'] = subtitles_page + + return { + 'id': video_id, + 'title': title, + 'url': 'rtmp://wafla.walla.co.il:1935/vod', + 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf', + 'page_url': url, + 'app': "vod", + 'play_path': playpath, + 'tc_url': 'rtmp://wafla.walla.co.il:1935/vod', + 'rtmp_protocol': 'rtmp', + 'ext': 'flv', + 'subtitles': subtitles, + } \ No newline at end of file