[arte] Clean up format sorting mess

We now use our standard sorting facilities. As a side effect, it's finally possible to download German videos from French URLs and vice versa.
master
Philipp Hagemeister 10 years ago
parent 3b9f631c41
commit aff2f4f4f5

@ -8,10 +8,10 @@ from ..utils import (
ExtractorError, ExtractorError,
find_xpath_attr, find_xpath_attr,
unified_strdate, unified_strdate,
determine_ext,
get_element_by_id, get_element_by_id,
get_element_by_attribute, get_element_by_attribute,
int_or_none, int_or_none,
qualities,
) )
# There are different sources of video in arte.tv, the extraction process # There are different sources of video in arte.tv, the extraction process
@ -102,79 +102,54 @@ class ArteTVPlus7IE(InfoExtractor):
'upload_date': unified_strdate(upload_date_str), 'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
} }
qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
all_formats = [] formats = []
for format_id, format_dict in player_info['VSR'].items(): for format_id, format_dict in player_info['VSR'].items():
fmt = dict(format_dict) f = dict(format_dict)
fmt['format_id'] = format_id versionCode = f.get('versionCode')
all_formats.append(fmt)
# Some formats use the m3u8 protocol langcode = {
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats)) 'fr': 'F',
def _match_lang(f): 'de': 'A',
if f.get('versionCode') is None: }.get(lang, lang)
return True lang_rexs = [r'VO?%s' % langcode, r'VO?.-ST%s' % langcode]
# Return true if that format is in the language of the url lang_pref = (
if lang == 'fr': None if versionCode is None else (
l = 'F' 10 if any(re.match(r, versionCode) for r in lang_rexs)
elif lang == 'de': else -10))
l = 'A' source_pref = 0
else: if versionCode is not None:
l = lang # The original version with subtitles has lower relevance
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] if re.match(r'VO-ST(F|A)', versionCode):
return any(re.match(r, f['versionCode']) for r in regexes) source_pref -= 10
# Some formats may not be in the same language as the url # The version with sourds/mal subtitles has also lower relevance
# TODO: Might want not to drop videos that does not match requested language elif re.match(r'VO?(F|A)-STM\1', versionCode):
# but to process those formats with lower precedence source_pref -= 9
formats = filter(_match_lang, all_formats) format = {
formats = list(formats) # in python3 filter returns an iterator 'format_id': format_id,
if not formats: 'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
# Some videos are only available in the 'Originalversion' 'language_preference': lang_pref,
# they aren't tagged as being in French or German 'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
# Sometimes there are neither videos of requested lang code 'width': int_or_none(f.get('width')),
# nor original version videos available 'height': int_or_none(f.get('height')),
# For such cases we just take all_formats as is 'tbr': int_or_none(f.get('bitrate')),
formats = all_formats 'quality': qfunc(f['quality']),
if not formats: 'source_preference': source_pref,
raise ExtractorError('The formats list is empty')
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
def sort_key(f):
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
else:
def sort_key(f):
versionCode = f.get('versionCode')
if versionCode is None:
versionCode = ''
return (
# Sort first by quality
int(f.get('height', -1)),
int(f.get('bitrate', -1)),
# The original version with subtitles has lower relevance
re.match(r'VO-ST(F|A)', versionCode) is None,
# The version with sourds/mal subtitles has also lower relevance
re.match(r'VO?(F|A)-STM\1', versionCode) is None,
# Prefer http downloads over m3u8
0 if f['url'].endswith('m3u8') else 1,
)
formats = sorted(formats, key=sort_key)
def _format(format_info):
info = {
'format_id': format_info['format_id'],
'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
'width': int_or_none(format_info.get('width')),
'height': int_or_none(format_info.get('height')),
'tbr': int_or_none(format_info.get('bitrate')),
} }
if format_info['mediaType'] == 'rtmp':
info['url'] = format_info['streamer'] if f.get('mediaType') == 'rtmp':
info['play_path'] = 'mp4:' + format_info['url'] format['url'] = f['streamer']
info['ext'] = 'flv' format['play_path'] = 'mp4:' + f['url']
format['ext'] = 'flv'
else: else:
info['url'] = format_info['url'] format['url'] = f['url']
info['ext'] = determine_ext(info['url'])
return info formats.append(format)
info_dict['formats'] = [_format(f) for f in formats]
self._sort_formats(formats)
info_dict['formats'] = formats
return info_dict return info_dict

@ -87,6 +87,11 @@ class InfoExtractor(object):
by this field, regardless of all other values. by this field, regardless of all other values.
-1 for default (order by other properties), -1 for default (order by other properties),
-2 or smaller for less than default. -2 or smaller for less than default.
* language_preference Is this in the correct requested
language?
10 if it's what the URL is about,
-1 for default (don't know),
-10 otherwise, other values reserved for now.
* quality Order number of the video quality of this * quality Order number of the video quality of this
format, irrespective of the file format. format, irrespective of the file format.
-1 for default (order by other properties), -1 for default (order by other properties),
@ -615,6 +620,7 @@ class InfoExtractor(object):
return ( return (
preference, preference,
f.get('language_preference') if f.get('language_preference') is not None else -1,
f.get('quality') if f.get('quality') is not None else -1, f.get('quality') if f.get('quality') is not None else -1,
f.get('height') if f.get('height') is not None else -1, f.get('height') if f.get('height') is not None else -1,
f.get('width') if f.get('width') is not None else -1, f.get('width') if f.get('width') is not None else -1,

Loading…
Cancel
Save