From 796173d08b514182eedc704541eb55d5c9e1dc0d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 3 Dec 2012 15:36:24 +0100 Subject: [PATCH] Keep video IDs verbatim if possible (Closes #571) --- test/test_utils.py | 5 +++++ youtube_dl/FileDownloader.py | 7 +++++-- youtube_dl/utils.py | 20 +++++++++++--------- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index fd8190f51..8afc30370 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -79,6 +79,11 @@ class TestUtil(unittest.TestCase): self.assertTrue(sanitize_filename('-', restricted=True) != '') self.assertTrue(sanitize_filename(':', restricted=True) != '') + def test_sanitize_ids(self): + self.assertEquals(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') + self.assertEquals(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') + self.assertEquals(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') + def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 48c8eb126..c471cc160 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -334,8 +334,11 @@ class FileDownloader(object): template_dict['epoch'] = int(time.time()) template_dict['autonumber'] = u'%05d' % self._num_downloads - template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items()) - template_dict = dict((k, sanitize_filename(compat_str(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items()) + sanitize = lambda k,v: sanitize_filename( + u'NA' if v is None else compat_str(v), + restricted=self.params.get('restrictfilenames'), + is_id=(k==u'id')) + template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) filename = self.params['outtmpl'] % template_dict return filename diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 7f73b8476..4dcf18991 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -317,9 +317,10 @@ def timeconvert(timestr): timestamp = email.utils.mktime_tz(timetuple) return timestamp -def sanitize_filename(s, restricted=False): +def sanitize_filename(s, restricted=False, is_id=False): """Sanitizes a string so it could be used as part of a filename. If restricted is set, use a stricter subset of allowed characters. + Set is_id if this is not an arbitrary string, but an ID that should be kept if possible """ def replace_insane(char): if char == '?' or ord(char) < 32 or ord(char) == 127: @@ -337,14 +338,15 @@ def sanitize_filename(s, restricted=False): return char result = u''.join(map(replace_insane, s)) - while '__' in result: - result = result.replace('__', '_') - result = result.strip('_') - # Common case of "Foreign band name - English song title" - if restricted and result.startswith('-_'): - result = result[2:] - if not result: - result = '_' + if not is_id: + while '__' in result: + result = result.replace('__', '_') + result = result.strip('_') + # Common case of "Foreign band name - English song title" + if restricted and result.startswith('-_'): + result = result[2:] + if not result: + result = '_' return result def orderedSet(iterable):