Skip to content

Commit d42a222

Browse files
committed
[core,utils] Implement unsafe file extension mitigation
* from https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4, thx grub4k
1 parent 072a43e commit d42a222

File tree

3 files changed

+209
-43
lines changed

3 files changed

+209
-43
lines changed

Diff for: test/test_utils.py

+46
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414
import io
1515
import itertools
1616
import json
17+
import types
1718
import xml.etree.ElementTree
1819

1920
from youtube_dl.utils import (
21+
_UnsafeExtensionError,
2022
age_restricted,
2123
args_to_str,
2224
base_url,
@@ -270,6 +272,27 @@ def env(var):
270272
expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
271273
'%s/expanded' % compat_getenv('HOME'))
272274

275+
_uncommon_extensions = [
276+
('exe', 'abc.exe.ext'),
277+
('de', 'abc.de.ext'),
278+
('../.mp4', None),
279+
('..\\.mp4', None),
280+
]
281+
282+
def assertUnsafeExtension(self, ext=None):
283+
assert_raises = self.assertRaises(_UnsafeExtensionError)
284+
assert_raises.ext = ext
285+
orig_exit = assert_raises.__exit__
286+
287+
def my_exit(self_, exc_type, exc_val, exc_tb):
288+
did_raise = orig_exit(exc_type, exc_val, exc_tb)
289+
if did_raise and assert_raises.ext is not None:
290+
self.assertEqual(assert_raises.ext, assert_raises.exception.extension, 'Unsafe extension not as unexpected')
291+
return did_raise
292+
293+
assert_raises.__exit__ = types.MethodType(my_exit, assert_raises)
294+
return assert_raises
295+
273296
def test_prepend_extension(self):
274297
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
275298
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
@@ -278,6 +301,19 @@ def test_prepend_extension(self):
278301
self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
279302
self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
280303

304+
# Test uncommon extensions
305+
self.assertEqual(prepend_extension('abc.ext', 'bin'), 'abc.bin.ext')
306+
for ext, result in self._uncommon_extensions:
307+
with self.assertUnsafeExtension(ext):
308+
prepend_extension('abc', ext)
309+
if result:
310+
self.assertEqual(prepend_extension('abc.ext', ext, 'ext'), result)
311+
else:
312+
with self.assertUnsafeExtension(ext):
313+
prepend_extension('abc.ext', ext, 'ext')
314+
with self.assertUnsafeExtension(ext):
315+
prepend_extension('abc.unexpected_ext', ext, 'ext')
316+
281317
def test_replace_extension(self):
282318
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
283319
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
@@ -286,6 +322,16 @@ def test_replace_extension(self):
286322
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
287323
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
288324

325+
# Test uncommon extensions
326+
self.assertEqual(replace_extension('abc.ext', 'bin'), 'abc.unknown_video')
327+
for ext, _ in self._uncommon_extensions:
328+
with self.assertUnsafeExtension(ext):
329+
replace_extension('abc', ext)
330+
with self.assertUnsafeExtension(ext):
331+
replace_extension('abc.ext', ext, 'ext')
332+
with self.assertUnsafeExtension(ext):
333+
replace_extension('abc.unexpected_ext', ext, 'ext')
334+
289335
def test_subtitles_filename(self):
290336
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
291337
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')

Diff for: youtube_dl/YoutubeDL.py

+17
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import copy
88
import datetime
99
import errno
10+
import functools
1011
import io
1112
import itertools
1213
import json
@@ -53,6 +54,7 @@
5354
compat_urllib_request_DataHandler,
5455
)
5556
from .utils import (
57+
_UnsafeExtensionError,
5658
age_restricted,
5759
args_to_str,
5860
bug_reports_message,
@@ -129,6 +131,20 @@
129131
import ctypes
130132

131133

134+
def _catch_unsafe_file_extension(func):
135+
@functools.wraps(func)
136+
def wrapper(self, *args, **kwargs):
137+
try:
138+
return func(self, *args, **kwargs)
139+
except _UnsafeExtensionError as error:
140+
self.report_error(
141+
'{0} found; to avoid damaging your system, this value is disallowed.'
142+
' If you believe this is an error{1}').format(
143+
error.message, bug_reports_message(','))
144+
145+
return wrapper
146+
147+
132148
class YoutubeDL(object):
133149
"""YoutubeDL class.
134150
@@ -1925,6 +1941,7 @@ def print_optional(field):
19251941
if self.params.get('forcejson', False):
19261942
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
19271943

1944+
@_catch_unsafe_file_extension
19281945
def process_info(self, info_dict):
19291946
"""Process a single resolved IE result."""
19301947

Diff for: youtube_dl/utils.py

+146-43
Original file line numberDiff line numberDiff line change
@@ -1717,39 +1717,6 @@ def random_user_agent():
17171717
'PST': -8, 'PDT': -7 # Pacific
17181718
}
17191719

1720-
1721-
class Namespace(object):
1722-
"""Immutable namespace"""
1723-
1724-
def __init__(self, **kw_attr):
1725-
self.__dict__.update(kw_attr)
1726-
1727-
def __iter__(self):
1728-
return iter(self.__dict__.values())
1729-
1730-
@property
1731-
def items_(self):
1732-
return self.__dict__.items()
1733-
1734-
1735-
MEDIA_EXTENSIONS = Namespace(
1736-
common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
1737-
video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
1738-
common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
1739-
audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
1740-
thumbnails=('jpg', 'png', 'webp'),
1741-
# storyboards=('mhtml', ),
1742-
subtitles=('srt', 'vtt', 'ass', 'lrc', 'ttml'),
1743-
manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
1744-
)
1745-
MEDIA_EXTENSIONS.video = MEDIA_EXTENSIONS.common_video + MEDIA_EXTENSIONS.video
1746-
MEDIA_EXTENSIONS.audio = MEDIA_EXTENSIONS.common_audio + MEDIA_EXTENSIONS.audio
1747-
1748-
KNOWN_EXTENSIONS = (
1749-
MEDIA_EXTENSIONS.video + MEDIA_EXTENSIONS.audio
1750-
+ MEDIA_EXTENSIONS.manifests
1751-
)
1752-
17531720
# needed for sanitizing filenames in restricted mode
17541721
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
17551722
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
@@ -3977,19 +3944,22 @@ def parse_duration(s):
39773944
return duration
39783945

39793946

3980-
def prepend_extension(filename, ext, expected_real_ext=None):
3947+
def _change_extension(prepend, filename, ext, expected_real_ext=None):
39813948
name, real_ext = os.path.splitext(filename)
3982-
return (
3983-
'{0}.{1}{2}'.format(name, ext, real_ext)
3984-
if not expected_real_ext or real_ext[1:] == expected_real_ext
3985-
else '{0}.{1}'.format(filename, ext))
3949+
sanitize_extension = _UnsafeExtensionError.sanitize_extension
39863950

3951+
if not expected_real_ext or real_ext.partition('.')[0::2] == ('', expected_real_ext):
3952+
filename = name
3953+
if prepend and real_ext:
3954+
sanitize_extension(ext, prepend=prepend)
3955+
return ''.join((filename, '.', ext, real_ext))
39873956

3988-
def replace_extension(filename, ext, expected_real_ext=None):
3989-
name, real_ext = os.path.splitext(filename)
3990-
return '{0}.{1}'.format(
3991-
name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3992-
ext)
3957+
# Mitigate path traversal and file impersonation attacks
3958+
return '.'.join((filename, sanitize_extension(ext)))
3959+
3960+
3961+
prepend_extension = functools.partial(_change_extension, True)
3962+
replace_extension = functools.partial(_change_extension, False)
39933963

39943964

39953965
def check_executable(exe, args=[]):
@@ -6579,3 +6549,136 @@ def join_nonempty(*values, **kwargs):
65796549
if from_dict is not None:
65806550
values = (traverse_obj(from_dict, variadic(v)) for v in values)
65816551
return delim.join(map(compat_str, filter(None, values)))
6552+
6553+
6554+
class Namespace(object):
6555+
"""Immutable namespace"""
6556+
6557+
def __init__(self, **kw_attr):
6558+
self.__dict__.update(kw_attr)
6559+
6560+
def __iter__(self):
6561+
return iter(self.__dict__.values())
6562+
6563+
@property
6564+
def items_(self):
6565+
return self.__dict__.items()
6566+
6567+
6568+
MEDIA_EXTENSIONS = Namespace(
6569+
common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
6570+
video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
6571+
common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
6572+
audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
6573+
thumbnails=('jpg', 'png', 'webp'),
6574+
# storyboards=('mhtml', ),
6575+
subtitles=('srt', 'vtt', 'ass', 'lrc', 'ttml'),
6576+
manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
6577+
)
6578+
MEDIA_EXTENSIONS.video = MEDIA_EXTENSIONS.common_video + MEDIA_EXTENSIONS.video
6579+
MEDIA_EXTENSIONS.audio = MEDIA_EXTENSIONS.common_audio + MEDIA_EXTENSIONS.audio
6580+
6581+
KNOWN_EXTENSIONS = (
6582+
MEDIA_EXTENSIONS.video + MEDIA_EXTENSIONS.audio
6583+
+ MEDIA_EXTENSIONS.manifests
6584+
)
6585+
6586+
6587+
class _UnsafeExtensionError(Exception):
6588+
"""
6589+
Mitigation exception for unwanted file overwrite/path traversal
6590+
This should be caught in YoutubeDL.py with a warning
6591+
6592+
Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
6593+
"""
6594+
_ALLOWED_EXTENSIONS = frozenset(itertools.chain(
6595+
( # internal
6596+
'description',
6597+
'json',
6598+
'meta',
6599+
'orig',
6600+
'part',
6601+
'temp',
6602+
'uncut',
6603+
'unknown_video',
6604+
'ytdl',
6605+
),
6606+
# video
6607+
MEDIA_EXTENSIONS.video, (
6608+
'avif',
6609+
'ismv',
6610+
'm2ts',
6611+
'm4s',
6612+
'mng',
6613+
'mpeg',
6614+
'qt',
6615+
'swf',
6616+
'ts',
6617+
'vp9',
6618+
'wvm',
6619+
),
6620+
# audio
6621+
MEDIA_EXTENSIONS.audio, (
6622+
'isma',
6623+
'mid',
6624+
'mpga',
6625+
'ra',
6626+
),
6627+
# image
6628+
MEDIA_EXTENSIONS.thumbnails, (
6629+
'bmp',
6630+
'gif',
6631+
'ico',
6632+
'heic',
6633+
'jng',
6634+
'jpeg',
6635+
'jxl',
6636+
'svg',
6637+
'tif',
6638+
'wbmp',
6639+
),
6640+
# subtitle
6641+
MEDIA_EXTENSIONS.subtitles, (
6642+
'dfxp',
6643+
'fs',
6644+
'ismt',
6645+
'sami',
6646+
'scc',
6647+
'ssa',
6648+
'tt',
6649+
),
6650+
# others
6651+
MEDIA_EXTENSIONS.manifests,
6652+
(
6653+
# not used in yt-dl
6654+
# *MEDIA_EXTENSIONS.storyboards,
6655+
# 'desktop',
6656+
# 'ism',
6657+
# 'm3u',
6658+
# 'sbv',
6659+
# 'swp',
6660+
# 'url',
6661+
# 'webloc',
6662+
# 'xml',
6663+
)))
6664+
6665+
def __init__(self, extension):
6666+
super(_UnsafeExtensionError, self).__init__('unsafe file extension: {0!r}'.format(extension))
6667+
self.extension = extension
6668+
6669+
@classmethod
6670+
def sanitize_extension(cls, extension, **kwargs):
6671+
# ... /, *, prepend=False
6672+
prepend = kwargs.get('prepend', False)
6673+
6674+
if '/' in extension or '\\' in extension:
6675+
raise cls(extension)
6676+
6677+
if not prepend:
6678+
last = extension.rpartition('.')[-1]
6679+
if last == 'bin':
6680+
extension = last = 'unknown_video'
6681+
if last.lower() not in cls._ALLOWED_EXTENSIONS:
6682+
raise cls(extension)
6683+
6684+
return extension

0 commit comments

Comments
 (0)