Skip to content

Commit 6dc2a1e

Browse files
h3rrrcoderabbitai[bot]KumoLiuericspodpre-commit-ci[bot]
authored andcommitted
Path traversal issue security fix (#8568)
The path traversal issue in the previously submitted vulnerability report has been fixed and has been verified and fixed locally. GHSA-x6ww-pf9m-m73m ### Description Added a security function to fix a security issue caused by directly using extractall in zip_file.extractall(output_dir). This has been verified locally and does not affect previous functionality. <img width="1605" height="371" alt="image" src="https://github.com/user-attachments/assets/2c854a6b-5786-48e4-909e-17015aeceb81" /> <img width="1661" height="300" alt="image" src="https://github.com/user-attachments/assets/3e9dda1a-f961-43dd-92e2-03780973adb1" /> Figure 1: Malicious zip file loaded, Figure 2: Normal zip file loaded ### Types of changes <!--- Put an `x` in all the boxes that apply, and remove the not applicable items --> - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [ ] Breaking change (fix or new feature that would cause existing functionality to change). - [ ] New tests added to cover the changes. - [ ] Integration tests passed locally by running `./runtests.sh -f -u --net --coverage`. - [ ] Quick tests passed locally by running `./runtests.sh --quick --unittests --disttests`. - [ ] In-line docstrings updated. - [ ] Documentation updated, tested `make html` command in the `docs/` folder. --------- Signed-off-by: h3rrr <81402797+h3rrr@users.noreply.github.com> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Co-authored-by: YunLiu <55491388+KumoLiu@users.noreply.github.com> Co-authored-by: Eric Kerfoot <17726042+ericspod@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent a9310ce commit 6dc2a1e

File tree

2 files changed

+243
-6
lines changed

2 files changed

+243
-6
lines changed

monai/apps/utils.py

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,38 @@ def update_to(self, b: int = 1, bsize: int = 1, tsize: int | None = None) -> Non
122122
raise e
123123

124124

125+
def safe_extract_member(member, extract_to):
126+
"""Securely verify compressed package member paths to prevent path traversal attacks"""
127+
# Get member path (handle different compression formats)
128+
if hasattr(member, "filename"):
129+
member_path = member.filename # zipfile
130+
elif hasattr(member, "name"):
131+
member_path = member.name # tarfile
132+
else:
133+
member_path = str(member)
134+
135+
if hasattr(member, "issym") and member.issym():
136+
raise ValueError(f"Symbolic link detected in archive: {member_path}")
137+
if hasattr(member, "islnk") and member.islnk():
138+
raise ValueError(f"Hard link detected in archive: {member_path}")
139+
140+
member_path = os.path.normpath(member_path)
141+
142+
if os.path.isabs(member_path) or ".." in member_path.split(os.sep):
143+
raise ValueError(f"Unsafe path detected in archive: {member_path}")
144+
145+
full_path = os.path.join(extract_to, member_path)
146+
full_path = os.path.normpath(full_path)
147+
148+
extract_root = os.path.realpath(extract_to)
149+
target_real = os.path.realpath(full_path)
150+
# Ensure the resolved path stays within the extraction root
151+
if os.path.commonpath([extract_root, target_real]) != extract_root:
152+
raise ValueError(f"Unsafe path: path traversal {member_path}")
153+
154+
return full_path
155+
156+
125157
def check_hash(filepath: PathLike, val: str | None = None, hash_type: str = "md5") -> bool:
126158
"""
127159
Verify hash signature of specified file.
@@ -242,6 +274,32 @@ def download_url(
242274
)
243275

244276

277+
def _extract_zip(filepath, output_dir):
278+
with zipfile.ZipFile(filepath, "r") as zip_file:
279+
for member in zip_file.infolist():
280+
safe_path = safe_extract_member(member, output_dir)
281+
if member.is_dir():
282+
continue
283+
os.makedirs(os.path.dirname(safe_path), exist_ok=True)
284+
with zip_file.open(member) as source:
285+
with open(safe_path, "wb") as target:
286+
shutil.copyfileobj(source, target)
287+
288+
289+
def _extract_tar(filepath, output_dir):
290+
with tarfile.open(filepath, "r") as tar_file:
291+
for member in tar_file.getmembers():
292+
safe_path = safe_extract_member(member, output_dir)
293+
if not member.isfile():
294+
continue
295+
os.makedirs(os.path.dirname(safe_path), exist_ok=True)
296+
source = tar_file.extractfile(member)
297+
if source is not None:
298+
with source:
299+
with open(safe_path, "wb") as target:
300+
shutil.copyfileobj(source, target)
301+
302+
245303
def extractall(
246304
filepath: PathLike,
247305
output_dir: PathLike = ".",
@@ -287,14 +345,10 @@ def extractall(
287345
logger.info(f"Writing into directory: {output_dir}.")
288346
_file_type = file_type.lower().strip()
289347
if filepath.name.endswith("zip") or _file_type == "zip":
290-
zip_file = zipfile.ZipFile(filepath)
291-
zip_file.extractall(output_dir)
292-
zip_file.close()
348+
_extract_zip(filepath, output_dir)
293349
return
294350
if filepath.name.endswith("tar") or filepath.name.endswith("tar.gz") or "tar" in _file_type:
295-
tar_file = tarfile.open(filepath)
296-
tar_file.extractall(output_dir)
297-
tar_file.close()
351+
_extract_tar(filepath, output_dir)
298352
return
299353
raise NotImplementedError(
300354
f'Unsupported file type, available options are: ["zip", "tar.gz", "tar"]. name={filepath} type={file_type}.'

tests/apps/test_download_and_extract.py

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@
1111

1212
from __future__ import annotations
1313

14+
import tarfile
1415
import tempfile
1516
import unittest
17+
import zipfile
1618
from pathlib import Path
1719
from urllib.error import ContentTooShortError, HTTPError
1820

@@ -66,5 +68,186 @@ def test_default(self, key, file_type):
6668
)
6769

6870

71+
class TestPathTraversalProtection(unittest.TestCase):
72+
"""Test cases for path traversal attack protection in extractall function."""
73+
74+
def test_valid_zip_extraction(self):
75+
"""Test that valid zip files extract successfully without raising exceptions."""
76+
with tempfile.TemporaryDirectory() as tmp_dir:
77+
# Create a valid zip file
78+
zip_path = Path(tmp_dir) / "valid_test.zip"
79+
extract_dir = Path(tmp_dir) / "extract"
80+
extract_dir.mkdir()
81+
82+
# Create zip with normal file structure
83+
with zipfile.ZipFile(zip_path, "w") as zf:
84+
zf.writestr("normal_file.txt", "This is a normal file")
85+
zf.writestr("subdir/nested_file.txt", "This is a nested file")
86+
zf.writestr("another_file.json", '{"key": "value"}')
87+
88+
# This should not raise any exception
89+
try:
90+
extractall(str(zip_path), str(extract_dir))
91+
92+
# Verify files were extracted correctly
93+
self.assertTrue((extract_dir / "normal_file.txt").exists())
94+
self.assertTrue((extract_dir / "subdir" / "nested_file.txt").exists())
95+
self.assertTrue((extract_dir / "another_file.json").exists())
96+
97+
# Verify content
98+
with open(extract_dir / "normal_file.txt") as f:
99+
self.assertEqual(f.read(), "This is a normal file")
100+
101+
except Exception as e:
102+
self.fail(f"Valid zip extraction should not raise exception: {e}")
103+
104+
def test_malicious_zip_path_traversal(self):
105+
"""Test that malicious zip files with path traversal attempts raise ValueError."""
106+
with tempfile.TemporaryDirectory() as tmp_dir:
107+
# Create malicious zip file with path traversal
108+
zip_path = Path(tmp_dir) / "malicious_test.zip"
109+
extract_dir = Path(tmp_dir) / "extract"
110+
extract_dir.mkdir()
111+
112+
# Create zip with malicious paths
113+
with zipfile.ZipFile(zip_path, "w") as zf:
114+
# Try to write outside extraction directory
115+
zf.writestr("../../../etc/malicious.txt", "malicious content")
116+
zf.writestr("normal_file.txt", "normal content")
117+
118+
# This should raise ValueError due to path traversal detection
119+
with self.assertRaises(ValueError) as context:
120+
extractall(str(zip_path), str(extract_dir))
121+
122+
self.assertIn("unsafe path", str(context.exception).lower())
123+
124+
def test_valid_tar_extraction(self):
125+
"""Test that valid tar files extract successfully without raising exceptions."""
126+
with tempfile.TemporaryDirectory() as tmp_dir:
127+
# Create a valid tar file
128+
tar_path = Path(tmp_dir) / "valid_test.tar.gz"
129+
extract_dir = Path(tmp_dir) / "extract"
130+
extract_dir.mkdir()
131+
132+
# Create tar with normal file structure
133+
with tarfile.open(tar_path, "w:gz") as tf:
134+
# Create temporary files to add to tar
135+
temp_file1 = Path(tmp_dir) / "temp1.txt"
136+
temp_file1.write_text("This is a normal file")
137+
tf.add(temp_file1, arcname="normal_file.txt")
138+
139+
temp_file2 = Path(tmp_dir) / "temp2.txt"
140+
temp_file2.write_text("This is a nested file")
141+
tf.add(temp_file2, arcname="subdir/nested_file.txt")
142+
143+
# This should not raise any exception
144+
try:
145+
extractall(str(tar_path), str(extract_dir))
146+
147+
# Verify files were extracted correctly
148+
self.assertTrue((extract_dir / "normal_file.txt").exists())
149+
self.assertTrue((extract_dir / "subdir" / "nested_file.txt").exists())
150+
151+
# Verify content
152+
with open(extract_dir / "normal_file.txt") as f:
153+
self.assertEqual(f.read(), "This is a normal file")
154+
155+
except Exception as e:
156+
self.fail(f"Valid tar extraction should not raise exception: {e}")
157+
158+
def test_malicious_tar_path_traversal(self):
159+
"""Test that malicious tar files with path traversal attempts raise ValueError."""
160+
with tempfile.TemporaryDirectory() as tmp_dir:
161+
# Create malicious tar file with path traversal
162+
tar_path = Path(tmp_dir) / "malicious_test.tar.gz"
163+
extract_dir = Path(tmp_dir) / "extract"
164+
extract_dir.mkdir()
165+
166+
# Create tar with malicious paths
167+
with tarfile.open(tar_path, "w:gz") as tf:
168+
# Create a temporary file
169+
temp_file = Path(tmp_dir) / "temp.txt"
170+
temp_file.write_text("malicious content")
171+
172+
# Add with malicious path (trying to write outside extraction directory)
173+
tf.add(temp_file, arcname="../../../etc/malicious.txt")
174+
175+
# This should raise ValueError due to path traversal detection
176+
with self.assertRaises(ValueError) as context:
177+
extractall(str(tar_path), str(extract_dir))
178+
179+
self.assertIn("unsafe path", str(context.exception).lower())
180+
181+
def test_absolute_path_protection(self):
182+
"""Test protection against absolute paths in archives."""
183+
with tempfile.TemporaryDirectory() as tmp_dir:
184+
# Create zip with absolute path
185+
zip_path = Path(tmp_dir) / "absolute_path_test.zip"
186+
extract_dir = Path(tmp_dir) / "extract"
187+
extract_dir.mkdir()
188+
189+
with zipfile.ZipFile(zip_path, "w") as zf:
190+
# Try to use absolute path
191+
zf.writestr("/etc/passwd_bad", "malicious content")
192+
193+
# This should raise ValueError due to absolute path detection
194+
with self.assertRaises(ValueError) as context:
195+
extractall(str(zip_path), str(extract_dir))
196+
197+
self.assertIn("unsafe path", str(context.exception).lower())
198+
199+
def test_malicious_symlink_protection(self):
200+
"""Test protection against malicious symlinks in tar archives."""
201+
with tempfile.TemporaryDirectory() as tmp_dir:
202+
# Create malicious tar file with symlink
203+
tar_path = Path(tmp_dir) / "malicious_symlink_test.tar.gz"
204+
extract_dir = Path(tmp_dir) / "extract"
205+
extract_dir.mkdir()
206+
207+
# Create tar with malicious symlink
208+
with tarfile.open(tar_path, "w:gz") as tf:
209+
temp_file = Path(tmp_dir) / "normal.txt"
210+
temp_file.write_text("normal content")
211+
tf.add(temp_file, arcname="normal.txt")
212+
213+
symlink_info = tarfile.TarInfo(name="malicious_symlink.txt")
214+
symlink_info.type = tarfile.SYMTYPE
215+
symlink_info.linkname = "../../../etc/passwd_bad"
216+
symlink_info.size = 0
217+
tf.addfile(symlink_info)
218+
219+
with self.assertRaises(ValueError) as context:
220+
extractall(str(tar_path), str(extract_dir))
221+
222+
error_msg = str(context.exception).lower()
223+
self.assertTrue("unsafe path" in error_msg or "symlink" in error_msg)
224+
225+
def test_malicious_hardlink_protection(self):
226+
"""Test protection against malicious hard links in tar archives."""
227+
with tempfile.TemporaryDirectory() as tmp_dir:
228+
# Create malicious tar file with hard link
229+
tar_path = Path(tmp_dir) / "malicious_hardlink_test.tar.gz"
230+
extract_dir = Path(tmp_dir) / "extract"
231+
extract_dir.mkdir()
232+
233+
# Create tar with malicious hard link
234+
with tarfile.open(tar_path, "w:gz") as tf:
235+
temp_file = Path(tmp_dir) / "normal.txt"
236+
temp_file.write_text("normal content")
237+
tf.add(temp_file, arcname="normal.txt")
238+
239+
hardlink_info = tarfile.TarInfo(name="malicious_hardlink.txt")
240+
hardlink_info.type = tarfile.LNKTYPE
241+
hardlink_info.linkname = "/etc/passwd_bad"
242+
hardlink_info.size = 0
243+
tf.addfile(hardlink_info)
244+
245+
with self.assertRaises(ValueError) as context:
246+
extractall(str(tar_path), str(extract_dir))
247+
248+
error_msg = str(context.exception).lower()
249+
self.assertTrue("unsafe path" in error_msg or "hardlink" in error_msg)
250+
251+
69252
if __name__ == "__main__":
70253
unittest.main()

0 commit comments

Comments
 (0)