diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3dff723b8..686988fe5 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1197,6 +1197,10 @@ class YoutubeDL(object): if res: res += ', ' res += format_bytes(fdict['filesize']) + elif fdict.get('filesize_approx') is not None: + if res: + res += ', ' + res += '~' + format_bytes(fdict['filesize_approx']) return res def list_formats(self, info_dict): diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e68657314..3213abacf 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -69,6 +69,7 @@ class InfoExtractor(object): * vcodec Name of the video codec in use * container Name of the container format * filesize The number of bytes, if known in advance + * filesize_approx An estimate for the number of bytes * player_url SWF Player URL (used for rtmpdump). * protocol The protocol that will be used for the actual download, lower-case. @@ -555,6 +556,7 @@ class InfoExtractor(object): f.get('abr') if f.get('abr') is not None else -1, audio_ext_preference, f.get('filesize') if f.get('filesize') is not None else -1, + f.get('filesize_approx') if f.get('filesize_approx') is not None else -1, f.get('format_id'), ) formats.sort(key=_formats_key) diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py index f89e81bf3..e762ad8f6 100644 --- a/youtube_dl/extractor/snotr.py +++ b/youtube_dl/extractor/snotr.py @@ -4,49 +4,39 @@ from __future__ import unicode_literals import re from .common import InfoExtractor - from ..utils import ( - + float_or_none, str_to_int, - parse_iso8601, - - - + parse_duration, ) + class SnotrIE(InfoExtractor): _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P\d+)/([\w]+)' - _TESTS =[ { + _TESTS = [{ 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks', 'info_dict': { 'id': '13708', 'ext': 'flv', 'title': 'Drone flying through fireworks!', 'duration': 247, - 'filesize':12320768 - } - }, - - - - { - + 'filesize_approx': 98566144, + } + }, { 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', 'info_dict': { 'id': '530', 'ext': 'flv', 'title': 'David Letteman - George W. Bush Top 10', 'duration': 126, - 'filesize': 1048576 - } - }] - + 'filesize_approx': 8912896, + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - # TODO more code goes here, for example ... webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage) @@ -54,20 +44,23 @@ class SnotrIE(InfoExtractor): video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id - view_count = str_to_int(self._html_search_regex(r'

\nViews:\n([\d,\.]+)

',webpage,'view count')) + view_count = str_to_int(self._html_search_regex( + r'

\nViews:\n([\d,\.]+)

', + webpage, 'view count', fatal=False)) - duration = self._html_search_regex(r'

\nLength:\n(.*?)

',webpage,'duration') - duration = str_to_int(duration[:1])*60 + str_to_int(duration[2:4]) + duration = parse_duration(self._html_search_regex( + r'

\nLength:\n\s*([0-9:]+).*?

', + webpage, 'duration', fatal=False)) - file_size = self._html_search_regex(r'

\nFilesize:\n(.*?)

',webpage,'filesize') - file_size = str_to_int(re.match(r'\d+',file_size).group())*131072 + filesize_approx = float_or_none(self._html_search_regex( + r'

\nFilesize:\n\s*([0-9.]+)\s*megabyte

', + webpage, 'filesize', fatal=False), invscale=1024 * 1024) return { 'id': video_id, 'title': title, - 'url':video_url, - 'view_count':view_count, - 'duration':duration, - 'filesize':file_size - - } \ No newline at end of file + 'url': video_url, + 'view_count': view_count, + 'duration': duration, + 'filesize_approx': filesize_approx, + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 919603c62..bf4d1112f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1193,13 +1193,6 @@ def format_bytes(bytes): return u'%.2f%s' % (converted, suffix) -def str_to_int(int_str): - if int_str is None: - return None - int_str = re.sub(r'[,\.]', u'', int_str) - return int(int_str) - - def get_term_width(): columns = os.environ.get('COLUMNS', None) if columns: @@ -1267,15 +1260,22 @@ class HEADRequest(compat_urllib_request.Request): return "HEAD" -def int_or_none(v, scale=1, default=None, get_attr=None): +def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): if get_attr: if v is not None: v = getattr(v, get_attr, None) - return default if v is None else (int(v) // scale) + return default if v is None else (int(v) * invscale // scale) + + +def str_to_int(int_str): + if int_str is None: + return None + int_str = re.sub(r'[,\.]', u'', int_str) + return int(int_str) -def float_or_none(v, scale=1, default=None): - return default if v is None else (float(v) / scale) +def float_or_none(v, scale=1, invscale=1, default=None): + return default if v is None else (float(v) * invscale / scale) def parse_duration(s):