[YoutubeDL] urlopen: disable the 'file:' protocol (#8227)

If someone is running youtube-dl on a server to deliver files, the user could input 'file:///some/important/file' and youtube-dl would save that file as a video giving access to sensitive information to the user.
'file:' urls can be filtered, but the user can use an URL to a crafted m3u8 manifest like:


With this patch 'file:' URLs raise URLError like for unknown protocols.
Jaime Marquínez Ferrándiz 9 years ago
parent 40cf7fcbd2
commit e37afbe0b8

@ -12,7 +12,7 @@ import copy
from test.helper import FakeYDL, assertRegexpMatches
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_str
from youtube_dl.compat import compat_str, compat_urllib_error
from youtube_dl.extractor import YoutubeIE
from youtube_dl.postprocessor.common import PostProcessor
from youtube_dl.utils import ExtractorError, match_filter_func
@ -631,6 +631,11 @@ class TestYoutubeDL(unittest.TestCase):
result = get_ids({'playlist_items': '10'})
self.assertEqual(result, [])
def test_urlopen_no_file_protocol(self):
# see https://github.com/rg3/youtube-dl/issues/8227
ydl = YDL()
self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
if __name__ == '__main__':

@ -1986,8 +1986,14 @@ class YoutubeDL(object):
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
data_handler = compat_urllib_request_DataHandler()
opener = compat_urllib_request.build_opener(
proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
unknown_handler = compat_urllib_request.UnknownHandler()
handlers = (proxy_handler, https_handler, cookie_processor, ydlh, data_handler, unknown_handler)
# we don't use build_opener because it automatically adds FileHandler,
# which can be used for malicious purposes (see
# https://github.com/rg3/youtube-dl/issues/8227)
opener = compat_urllib_request.OpenerDirector()
for handler in handlers:
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play