[radiojavan] Simplify and extract upload date

9 years ago · 7cf97daf77
parent cce23e43a9
commit 7cf97daf77
1 changed files with 35 additions and 40 deletions
--- a/youtube_dl/extractor/radiojavan.py
+++ b/youtube_dl/extractor/radiojavan.py
@ -1,12 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import(
-    parse_duration,
+    unified_strdate,
-    str_to_int
+    str_to_int,
 )
 class RadioJavanIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
    _TEST = {
@ -16,56 +18,49 @@ class RadioJavanIE(InfoExtractor):
            'id': 'chaartaar-ashoobam',
            'ext': 'mp4',
            'title': 'Chaartaar - Ashoobam',
            'description': 'Chaartaar - Ashoobam',
            'thumbnail': 're:^https?://.*\.jpe?g$',
            'upload_date': '20150215',
            'view_count': int,
            'like_count': int,
            'dislike_count': int,
        }
    }
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        urls = list()
        prefix = 'https://media.rdjavan.com/media/music_video/'
-        video_url_480 = self._search_regex(
+        webpage = self._download_webpage(url, video_id)
            r'RJ\.video480p = \'([^\']+)\'', webpage, '480 video url', fatal= False)
        video_url_720 = self._search_regex(
            r'RJ\.video720p = \'([^\']+)\'', webpage, '720 video url', fatal= False)
        video_url_1080 = self._search_regex(
            r'RJ\.video1080p = \'([^\']+)\'', webpage, '1080 video url', fatal= False)
-        if video_url_480:
+        formats = [{
-            urls.append({'url': prefix + video_url_480, 'format': '480p'})
+            'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
-        if video_url_720:
+            'format_id': '%sp' % height,
-            urls.append({'url': prefix + video_url_720, 'format': '720p'})
+            'height': height,
-        if video_url_1080:
+        } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
            urls.append({'url': prefix + video_url_1080, 'format': '1080p'})
        title = self._og_search_title(webpage)
        thumbnail = self._og_search_thumbnail(webpage)
        formats = [{
            'url': url['url'],
            'format': url['format']
        } for url in urls]
-        likes = self._search_regex(
+        upload_date = unified_strdate(self._search_regex(
-            r'<span class="rating">([\d,]+)\s*likes</span>', webpage, 'Likes Count', fatal=False )
+            r'class="date_added">Date added: ([^<]+)<',
-        likes = likes.replace(',', '')
+            webpage, 'upload date', fatal=False))
        dislikes = self._search_regex(
            r'<span class="rating">([\d,]+)\s*dislikes</span>', webpage, 'Dislikes Count', fatal=False )
        dislikes = dislikes.replace(',', '')
-        plays = self._search_regex(
+        view_count = str_to_int(self._search_regex(
-            r'views_publish[">\s]*<span[^>]+class="views">Plays: ([\d,]+)</span>', webpage, 'Play Count', fatal=False )
+            r'class="views">Plays: ([\d,]+)',
-        plays = plays.replace(',', '')
+            webpage, 'view count', fatal=False))
        like_count = str_to_int(self._search_regex(
            r'class="rating">([\d,]+) likes',
            webpage, 'like count', fatal=False))
        dislike_count = str_to_int(self._search_regex(
            r'class="rating">([\d,]+) dislikes',
            webpage, 'dislike count', fatal=False))
        return {
-            'formats': formats,
+            'id': video_id,
            'id': display_id,
            'title': title,
            'description': title, # no description provided in RadioJavan
            'thumbnail': thumbnail,
-            'like_count': str_to_int(likes),
+            'upload_date': upload_date,
-            'dislike_count': str_to_int(dislikes),
+            'view_count': view_count,
-            'viewCount': str_to_int(plays)
+            'like_count': like_count,
-        }
+            'dislike_count': dislike_count,
            'formats': formats,
        }