From 4c64d2b357baf595d54a2bc6bb0ccfa8ed98acd4 Mon Sep 17 00:00:00 2001 From: TrellixVulnTeam Date: Tue, 15 Nov 2022 23:43:46 +0000 Subject: [PATCH] Adding tarfile member sanitization to extractall() --- ...ee8ca65d2c527-<_prepare_model_data>-bug.py | 24 ++++++++++++++++++- ...ee8ca65d2c527-<_prepare_model_data>-fix.py | 24 ++++++++++++++++++- ...ee8ca65d2c527-<_prepare_model_data>-bug.py | 24 ++++++++++++++++++- ...ee8ca65d2c527-<_prepare_model_data>-fix.py | 24 ++++++++++++++++++- ...ec6233a9333b9a9319ba0b26b73f36217-0-bug.py | 21 +++++++++++++++- ...ec6233a9333b9a9319ba0b26b73f36217-0-fix.py | 21 +++++++++++++++- 6 files changed, 132 insertions(+), 6 deletions(-) diff --git a/Data Set/bug-fixing-2/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-bug.py b/Data Set/bug-fixing-2/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-bug.py index d6e305c4..f9b75587 100644 --- a/Data Set/bug-fixing-2/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-bug.py +++ b/Data Set/bug-fixing-2/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-bug.py @@ -14,7 +14,29 @@ def _prepare_model_data(self, model_test): urlretrieve(url, download_file.name) print('Done') with tarfile.open(download_file.name) as t: - t.extractall(models_dir) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(t, models_dir) finally: print('Failed to prepare data for model {}'.format(model_test.model_name)) os.remove(download_file.name) diff --git a/Data Set/bug-fixing-2/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-fix.py b/Data Set/bug-fixing-2/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-fix.py index 3455b382..91b64e07 100644 --- a/Data Set/bug-fixing-2/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-fix.py +++ b/Data Set/bug-fixing-2/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-fix.py @@ -14,7 +14,29 @@ def _prepare_model_data(self, model_test): urlretrieve(url, download_file.name) print('Done') with tarfile.open(download_file.name) as t: - t.extractall(models_dir) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(t, models_dir) except Exception as e: print('Failed to prepare data for model {}: {}'.format(model_test.model_name, e)) raise diff --git a/Data Set/bug-fixing-5/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-bug.py b/Data Set/bug-fixing-5/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-bug.py index bb03f94b..7c159099 100644 --- a/Data Set/bug-fixing-5/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-bug.py +++ b/Data Set/bug-fixing-5/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-bug.py @@ -12,7 +12,29 @@ def _prepare_model_data(self, model_test): urlretrieve(url, download_file.name) print('Done') with tarfile.open(download_file.name) as t: - t.extractall(models_dir) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(t, models_dir) finally: print('Failed to prepare data for model {}'.format(model_test.model_name)) os.remove(download_file.name) diff --git a/Data Set/bug-fixing-5/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-fix.py b/Data Set/bug-fixing-5/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-fix.py index 608fd61f..933bbc31 100644 --- a/Data Set/bug-fixing-5/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-fix.py +++ b/Data Set/bug-fixing-5/948d58ecfdda53b8b065749624cee8ca65d2c527-<_prepare_model_data>-fix.py @@ -12,7 +12,29 @@ def _prepare_model_data(self, model_test): urlretrieve(url, download_file.name) print('Done') with tarfile.open(download_file.name) as t: - t.extractall(models_dir) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(t, models_dir) except Exception as e: print('Failed to prepare data for model {}: {}'.format(model_test.model_name, e)) raise diff --git a/Test Set/Open Source Projects/tensorlayer/fcb8a44ec6233a9333b9a9319ba0b26b73f36217-0-bug.py b/Test Set/Open Source Projects/tensorlayer/fcb8a44ec6233a9333b9a9319ba0b26b73f36217-0-bug.py index 9d386f13..83dcf6ff 100755 --- a/Test Set/Open Source Projects/tensorlayer/fcb8a44ec6233a9333b9a9319ba0b26b73f36217-0-bug.py +++ b/Test Set/Open Source Projects/tensorlayer/fcb8a44ec6233a9333b9a9319ba0b26b73f36217-0-bug.py @@ -576,7 +576,26 @@ def get_wmt_enfr_train_set(directory): _WMT_ENFR_TRAIN_URL) print("Extracting tar file %s" % corpus_file) with tarfile.open(corpus_file, "r") as corpus_tar: - corpus_tar.extractall(directory) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(corpus_tar, directory) gunzip_file(train_path + ".fr.gz", train_path + ".fr") gunzip_file(train_path + ".en.gz", train_path + ".en") return train_path diff --git a/Test Set/Open Source Projects/tensorlayer/fcb8a44ec6233a9333b9a9319ba0b26b73f36217-0-fix.py b/Test Set/Open Source Projects/tensorlayer/fcb8a44ec6233a9333b9a9319ba0b26b73f36217-0-fix.py index 0b10a055..50cfd1fe 100755 --- a/Test Set/Open Source Projects/tensorlayer/fcb8a44ec6233a9333b9a9319ba0b26b73f36217-0-fix.py +++ b/Test Set/Open Source Projects/tensorlayer/fcb8a44ec6233a9333b9a9319ba0b26b73f36217-0-fix.py @@ -626,7 +626,26 @@ def get_wmt_enfr_train_set(path): _WMT_ENFR_TRAIN_URL) print("Extracting tar file %s" % corpus_file) with tarfile.open(corpus_file, "r") as corpus_tar: - corpus_tar.extractall(path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(corpus_tar, path) gunzip_file(train_path + ".fr.gz", train_path + ".fr") gunzip_file(train_path + ".en.gz", train_path + ".en") return train_path