From a1ed5434f09b00adc2571b6a82920fddde6626d7 Mon Sep 17 00:00:00 2001 From: Daniel Johnson Date: Thu, 13 Jul 2023 10:26:10 -0600 Subject: [PATCH 01/18] initial perms commit for main pull --- dsi/core.py | 7 +++++-- dsi/drivers/filesystem.py | 18 +++++++++++++++--- dsi/drivers/parquet.py | 1 + dsi/drivers/permissions.py | 20 ++++++++++++++++++++ 4 files changed, 41 insertions(+), 5 deletions(-) create mode 100644 dsi/drivers/permissions.py diff --git a/dsi/core.py b/dsi/core.py index ddb4710e..ba93058e 100644 --- a/dsi/core.py +++ b/dsi/core.py @@ -47,6 +47,7 @@ def static_munge(prefix, implementations): for valid_function in valid_module_functions_flattened: self.active_modules[valid_function] = [] self.active_metadata = OrderedDict() + self.active_metadata_perms = OrderedDict() self.transload_lock = False def list_available_modules(self, mod_type): @@ -111,7 +112,7 @@ def add_external_python_module(self, mod_type, mod_name, mod_path): Afterwards, load_module can be used to load a DSI module from the added Python module. Note: mod_type is needed because each Python module should only implement plugins or drivers. - + For example, term = Terminal() term.add_external_python_module('plugin', 'my_python_file', '/the/path/to/my_python_file.py') @@ -171,7 +172,9 @@ def artifact_handler(self, interaction_type, **kwargs): collection=self.active_metadata, **kwargs) operation_success = True elif interaction_type == 'get': - self.active_metadata = obj.get_artifacts(**kwargs) + self.active_metadata.update(obj.get_artifacts(**kwargs)) + self.active_metadata_perms.update( + obj.get_artifacts_perms(), **kwargs) operation_success = True if interaction_type == 'inspect': for module_type, objs in selected_function_modules.items(): diff --git a/dsi/drivers/filesystem.py b/dsi/drivers/filesystem.py index a2bd161d..4c33261c 100644 --- a/dsi/drivers/filesystem.py +++ b/dsi/drivers/filesystem.py @@ -1,4 +1,8 @@ from abc import ABCMeta, abstractmethod +from stat import S_IRWXU, S_IRWXG, S_IRWXO +from os import stat + +from dsi.drivers.permissions import PermissionsManager class Driver(metaclass=ABCMeta): @@ -12,7 +16,7 @@ def git_commit_sha(self): pass @abstractmethod - def put_artifacts(self, artifacts, kwargs) -> None: + def put_artifacts(self, artifacts, **kwargs) -> None: pass @abstractmethod @@ -43,9 +47,9 @@ class Filesystem(Driver): EQ = "=" def __init__(self, filename) -> None: - pass + self.perms_manager = PermissionsManager() - def put_artifacts(self, artifacts, kwargs) -> None: + def put_artifacts(self, artifacts, **kwargs) -> None: pass def get_artifacts(self, query): @@ -53,3 +57,11 @@ def get_artifacts(self, query): def inspect_artifacts(self): pass + + def get_file_permissions(self, fpath: str) -> tuple[int, int, str]: + st = stat(fpath) + uid = st.st_uid + gid = st.st_gid + perm_mask = S_IRWXU | S_IRWXG | S_IRWXO + settings = oct(st.st_mode & perm_mask) + return (uid, gid, settings) diff --git a/dsi/drivers/parquet.py b/dsi/drivers/parquet.py index 71ab62c9..511ecfae 100644 --- a/dsi/drivers/parquet.py +++ b/dsi/drivers/parquet.py @@ -25,6 +25,7 @@ def get_artifacts(self): """Get Parquet data from filename.""" table = pq.read_table(self.filename) resout = table.to_pydict() + perms = {k, } return resout def put_artifacts(self, collection): diff --git a/dsi/drivers/permissions.py b/dsi/drivers/permissions.py new file mode 100644 index 00000000..da53d3cb --- /dev/null +++ b/dsi/drivers/permissions.py @@ -0,0 +1,20 @@ +from dataclasses import dataclass + + +@dataclass(eq=True, frozen=True) +class Permission: + uid: int + gid: int + settings: str + + +class PermissionsManager: + def __init__(self): + self.perms_collection = {} + + def get(self, uid: int, gid: int, settings: str) -> Permission: + if (uid, gid, settings) in self.perms_collection: + return self.perms_collection[(uid, gid, settings)] + perm = Permission(uid, gid, settings) + self.perms_collection[(uid, gid, settings)] = perm + return perm From 48d5fd6bd2f878aec6c7c3098c524031bf7f3edd Mon Sep 17 00:00:00 2001 From: Daniel Johnson Date: Tue, 18 Jul 2023 11:25:55 -0600 Subject: [PATCH 02/18] added permission tracking, not used yet --- dsi/core.py | 9 +-- dsi/data/dummy_data.pq | Bin 0 -> 1316 bytes dsi/drivers/filesystem.py | 20 ++---- dsi/drivers/parquet.py | 6 +- dsi/drivers/permissions.py | 20 ------ dsi/permissions/permissions.py | 78 ++++++++++++++++++++++ dsi/permissions/tests/test_permissions.py | 37 ++++++++++ dsi/plugins/env.py | 29 +++++--- dsi/plugins/metadata.py | 6 +- 9 files changed, 150 insertions(+), 55 deletions(-) create mode 100644 dsi/data/dummy_data.pq delete mode 100644 dsi/drivers/permissions.py create mode 100644 dsi/permissions/permissions.py create mode 100644 dsi/permissions/tests/test_permissions.py diff --git a/dsi/core.py b/dsi/core.py index 5fb1b561..2e759049 100644 --- a/dsi/core.py +++ b/dsi/core.py @@ -3,6 +3,8 @@ from collections import OrderedDict from itertools import product +from dsi.permissions.permissions import PermissionsManager + class Terminal(): """ @@ -47,7 +49,7 @@ def static_munge(prefix, implementations): for valid_function in valid_module_functions_flattened: self.active_modules[valid_function] = [] self.active_metadata = OrderedDict() - self.active_metadata_perms = OrderedDict() + self.perms_manager = PermissionsManager() self.transload_lock = False def list_available_modules(self, mod_type): @@ -95,7 +97,8 @@ def load_module(self, mod_type, mod_name, mod_function, **kwargs): try: this_module = import_module(python_module) class_ = getattr(this_module, class_name) - self.active_modules[mod_function].append(class_(**kwargs)) + instance = class_(perms_manager=self.perms_manager, **kwargs) + self.active_modules[mod_function].append(instance) load_success = True except AttributeError: continue @@ -190,8 +193,6 @@ def artifact_handler(self, interaction_type, **kwargs): operation_success = True elif interaction_type == 'get': self.active_metadata.update(obj.get_artifacts(**kwargs)) - self.active_metadata_perms.update( - obj.get_artifacts_perms(), **kwargs) operation_success = True if interaction_type == 'inspect': for module_type, objs in selected_function_modules.items(): diff --git a/dsi/data/dummy_data.pq b/dsi/data/dummy_data.pq new file mode 100644 index 0000000000000000000000000000000000000000..bc57a7ce984d4b38b4469a7e75564f39bc130738 GIT binary patch literal 1316 zcmcIk!A{#i5FKw^*OhvR5WBJ^A9BG$Do{W~iYhpa1GE(&XjFv4EhJWgpd>Ych#TT- zdaO8cjC$%n^k+J=&dOF2gg9hm&Ayr4dHZJ9_7T?Bq%DhbeqRa+a{z8&L244@oK#Dc zFG@!`f#;f;3@KQglBN20B|%p}Tc103 zrzgjRs`b%&L=JKLc&uJ$bM@Q2s`aIHiwECnX@$c`H$jVtob=T(8l<{+vi^*orY>NY zn&vxOFJE%s6i|JW`yH8 None: - self.perms_manager = PermissionsManager() + def __init__(self, filename: str, perms_manager: PermissionsManager) -> None: + self.filename = filename + self.perms_manager = perms_manager def put_artifacts(self, artifacts, **kwargs) -> None: pass @@ -57,11 +55,3 @@ def get_artifacts(self, query): def inspect_artifacts(self): pass - - def get_file_permissions(self, fpath: str) -> tuple[int, int, str]: - st = stat(fpath) - uid = st.st_uid - gid = st.st_gid - perm_mask = S_IRWXU | S_IRWXG | S_IRWXO - settings = oct(st.st_mode & perm_mask) - return (uid, gid, settings) diff --git a/dsi/drivers/parquet.py b/dsi/drivers/parquet.py index 511ecfae..164b5c57 100644 --- a/dsi/drivers/parquet.py +++ b/dsi/drivers/parquet.py @@ -14,8 +14,7 @@ class Parquet(Filesystem): """ def __init__(self, filename, **kwargs): - super().__init__(filename=filename) - self.filename = filename + super().__init__(filename=filename, **kwargs) try: self.compression = kwargs['compression'] except KeyError: @@ -25,7 +24,8 @@ def get_artifacts(self): """Get Parquet data from filename.""" table = pq.read_table(self.filename) resout = table.to_pydict() - perms = {k, } + self.perms_manager.register_columns_with_file( + list(resout.keys()), self.filename) return resout def put_artifacts(self, collection): diff --git a/dsi/drivers/permissions.py b/dsi/drivers/permissions.py deleted file mode 100644 index da53d3cb..00000000 --- a/dsi/drivers/permissions.py +++ /dev/null @@ -1,20 +0,0 @@ -from dataclasses import dataclass - - -@dataclass(eq=True, frozen=True) -class Permission: - uid: int - gid: int - settings: str - - -class PermissionsManager: - def __init__(self): - self.perms_collection = {} - - def get(self, uid: int, gid: int, settings: str) -> Permission: - if (uid, gid, settings) in self.perms_collection: - return self.perms_collection[(uid, gid, settings)] - perm = Permission(uid, gid, settings) - self.perms_collection[(uid, gid, settings)] = perm - return perm diff --git a/dsi/permissions/permissions.py b/dsi/permissions/permissions.py new file mode 100644 index 00000000..8b69ef99 --- /dev/null +++ b/dsi/permissions/permissions.py @@ -0,0 +1,78 @@ +from dataclasses import dataclass +from stat import S_IRWXU, S_IRWXG, S_IRWXO +from os import stat + + +@dataclass(eq=True, frozen=True) +class Permission: + """ A simple dataclass to represent POSIX file permissions """ + uid: int + gid: int + settings: str + + def __iter__(self): + """ enables conversion to tuple, list, etc. """ + for v in (self.uid, self.gid, self.settings): + yield v + + +class PermissionsManager: + """ + A class to handle and register the mapping from columns + to their permissions. Uses flyweights so that each unique + permission is shared and only stored in memory once. + """ + + def __init__(self): + self.perms_collection = {} + self.column_perms = {} + + def get_perm(self, uid, gid, settings) -> Permission: + """ If a perm already exists, return it. Else create it. """ + if (uid, gid, settings) in self.perms_collection: + return self.perms_collection[(uid, gid, settings)] + perm = Permission(uid, gid, settings) + self.perms_collection[(uid, gid, settings)] = perm + return perm + + def register_columns(self, keys: list[str], perm: Permission) -> None: + """ Links a list of column names to a given permission """ + if tuple(perm) not in self.perms_collection: + raise PermissionNotFoundError(perm) + for key in keys: + self.column_perms[key] = perm + + def register_columns_with_file(self, keys: list[str], fp: str) -> None: + uid, gid, settings = (None, None, None) if fp is None \ + else self.get_file_permissions(fp) + perm = self.get_perm(uid, gid, settings) + self.register_columns(keys, perm) + + def get_column_perms(self, key: str) -> Permission: + """ Get the Permission of a given column """ + try: + return self.column_perms[key] + except KeyError: + raise ColumnNotRegisteredError(key) + + def get_file_permissions(self, fpath: str) -> tuple[int, int, str]: + """ Given a filepath, returns (uid, gid, settings) """ + st = stat(fpath) # includes info on filetype, perms, etc. + uid = st.st_uid + gid = st.st_gid + perm_mask = S_IRWXU | S_IRWXG | S_IRWXO # user | group | other + settings = oct(st.st_mode & perm_mask) # select perm bits from st_mode + return (uid, gid, settings) + + +class PermissionNotFoundError(Exception): + def __init__(self, perm: Permission) -> None: + self.msg = f"Permission {perm} not found. Make sure to use get_perm instead of " + \ + "manually instantiating a Permission to register." + super().__init__(self.msg) + + +class ColumnNotRegisteredError(Exception): + def __init__(self, key: str) -> None: + self.msg = f"Permission for column {key} not registered. Be sure to `register_columns`." + super().__init__(self.msg) diff --git a/dsi/permissions/tests/test_permissions.py b/dsi/permissions/tests/test_permissions.py new file mode 100644 index 00000000..6fc7dd0f --- /dev/null +++ b/dsi/permissions/tests/test_permissions.py @@ -0,0 +1,37 @@ +import git +import os + +from dsi.core import Terminal + + +def get_git_root(path): + git_repo = git.Repo(path, search_parent_directories=True) + git_root = git_repo.git.rev_parse("--show-toplevel") + return (git_root) + + +def test_multiple_files_different_perms(): + term = Terminal() + bueno_path = '/'.join([get_git_root('.'), 'dsi/data', 'bueno.data']) + pq_path = '/'.join([get_git_root('.'), 'dsi/data', 'dummy_data.pq']) + os.chmod(bueno_path, 0o660) + os.chmod(pq_path, 0o664) + + term.load_module('plugin', 'Bueno', 'consumer', filename=bueno_path) + term.load_module('driver', 'Parquet', 'back-end', filename=pq_path) + term.artifact_handler(interaction_type='get') + term.transload() + + for env_col in ('uid', 'effective_gid', 'moniker', 'gid_list'): + assert tuple(term.perms_manager.column_perms[env_col]) == \ + (None, None, None) + + for bueno_col in ('foo', 'bar', 'baz'): + uid, gid, settings = tuple(term.perms_manager.column_perms[bueno_col]) + assert type(uid) == type(gid) == int + assert settings == '0o660' + + for pq_col in ('one', 'two', 'three'): + uid, gid, settings = tuple(term.perms_manager.column_perms[pq_col]) + assert type(uid) == type(gid) == int + assert settings == '0o664' diff --git a/dsi/plugins/env.py b/dsi/plugins/env.py index 3c2cdcd2..bd3b5c9c 100644 --- a/dsi/plugins/env.py +++ b/dsi/plugins/env.py @@ -17,8 +17,8 @@ class Environment(StructuredMetadata): information. """ - def __init__(self): - super().__init__() + def __init__(self, **kwargs): + super().__init__(**kwargs) # Get POSIX info self.posix_info = OrderedDict() self.posix_info['uid'] = os.getuid() @@ -38,12 +38,12 @@ class Hostname(Environment): """ def __init__(self, **kwargs) -> None: - super().__init__() + super().__init__(**kwargs) def pack_header(self) -> None: """Set schema with keys of prov_info.""" column_names = list(self.posix_info.keys()) + ["hostname"] - self.set_schema(column_names) + self.perms_manager.register_columns_with_file(column_names, None) def add_row(self) -> None: """Parses environment provenance data and adds the row.""" @@ -63,14 +63,19 @@ class Bueno(Environment): """ def __init__(self, filename, **kwargs) -> None: - super().__init__() + super().__init__(**kwargs) self.bueno_data = OrderedDict() self.filename = filename def pack_header(self) -> None: """Set schema with POSIX and Bueno data.""" - column_names = list(self.posix_info.keys()) + \ - list(self.bueno_data.keys()) + posix_columns = list(self.posix_info.keys()) + bueno_columns = list(self.bueno_data.keys()) + self.perms_manager.register_columns_with_file(posix_columns, None) + self.perms_manager.register_columns_with_file( + bueno_columns, self.filename) + + column_names = posix_columns + bueno_columns self.set_schema(column_names) def add_row(self) -> None: @@ -99,9 +104,9 @@ class GitInfo(Environment): Adds the current git remote and git commit to metadata. """ - def __init__(self, git_repo_path="./") -> None: + def __init__(self, git_repo_path="./", **kwargs) -> None: """ Initializes the git repo in the given directory and access to git commands """ - super().__init__() + super().__init__(**kwargs) try: self.repo = Repo(git_repo_path) except git.exc: @@ -118,6 +123,7 @@ def pack_header(self) -> None: column_names = list(self.posix_info.keys()) + \ list(self.git_info.keys()) self.set_schema(column_names) + self.perms_manager.register_columns_with_file(column_names, None) def add_row(self) -> None: """ Adds a row to the output with POSIX info, git remote, and git commit """ @@ -142,9 +148,9 @@ class SystemKernel(Environment): 6. Container information, if containerized """ - def __init__(self) -> None: + def __init__(self, **kwargs) -> None: """Initialize SystemKernel with inital provenance info.""" - super().__init__() + super().__init__(**kwargs) self.prov_info = self.get_prov_info() def pack_header(self) -> None: @@ -152,6 +158,7 @@ def pack_header(self) -> None: column_names = list(self.posix_info.keys()) + \ list(self.prov_info.keys()) self.set_schema(column_names) + self.perms_manager.register_columns_with_file(column_names, None) def add_row(self) -> None: """Parses environment provenance data and adds the row.""" diff --git a/dsi/plugins/metadata.py b/dsi/plugins/metadata.py index 3805e740..f918f5f7 100644 --- a/dsi/plugins/metadata.py +++ b/dsi/plugins/metadata.py @@ -31,20 +31,22 @@ def add_to_output(self, path): class StructuredMetadata(Plugin): """ plugin superclass that provides handy methods for structured data """ - git_commit_sha = '5d79e08d4a6c1570ceb47cdd61d2259505c05de9' + git_commit_sha: str = '5d79e08d4a6c1570ceb47cdd61d2259505c05de9' - def __init__(self): + def __init__(self, **kwargs): """ Initializes a StructuredDataPlugin with an output collector and an initially unset column count. """ self.output_collector = OrderedDict() self.column_cnt = None # schema not set until pack_header + self.perms_manager = kwargs['perms_manager'] def set_schema(self, column_names: list) -> None: """ Initializes columns in the output_collector and column_cnt. Useful in a plugin's pack_header method. + Also registers column permissions if filename is set. """ for name in column_names: self.output_collector[name] = [] From 6d9ccf3f22caa9997f0757fe79201cbc59c3fd2e Mon Sep 17 00:00:00 2001 From: Daniel Johnson Date: Tue, 18 Jul 2023 11:56:55 -0600 Subject: [PATCH 03/18] update plugin tests to use perms_manager --- dsi/plugins/env.py | 3 ++- dsi/plugins/tests/test_env.py | 31 +++++++++++++++++++++---------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/dsi/plugins/env.py b/dsi/plugins/env.py index bd3b5c9c..bdd410f5 100644 --- a/dsi/plugins/env.py +++ b/dsi/plugins/env.py @@ -43,6 +43,7 @@ def __init__(self, **kwargs) -> None: def pack_header(self) -> None: """Set schema with keys of prov_info.""" column_names = list(self.posix_info.keys()) + ["hostname"] + self.set_schema(column_names) self.perms_manager.register_columns_with_file(column_names, None) def add_row(self) -> None: @@ -109,7 +110,7 @@ def __init__(self, git_repo_path="./", **kwargs) -> None: super().__init__(**kwargs) try: self.repo = Repo(git_repo_path) - except git.exc: + except git.exc.InvalidGitRepositoryError: raise Exception(f"Git could not find .git/ in {git_repo_path}, " + "GitInfo Plugin must be given a repo base path " + "(default is working dir)") diff --git a/dsi/plugins/tests/test_env.py b/dsi/plugins/tests/test_env.py index fa44a918..b0677ca2 100644 --- a/dsi/plugins/tests/test_env.py +++ b/dsi/plugins/tests/test_env.py @@ -1,6 +1,7 @@ import collections from dsi.plugins.env import Hostname, SystemKernel, Bueno, GitInfo +from dsi.permissions.permissions import PermissionsManager import git @@ -11,14 +12,16 @@ def get_git_root(path): def test_hostname_plugin_type(): - a = Hostname() + mock_pm = PermissionsManager() + a = Hostname(perms_manager=mock_pm) a.add_row() a.add_row() assert type(a.output_collector) == collections.OrderedDict def test_hostname_plugin_col_shape(): - a = Hostname() + mock_pm = PermissionsManager() + a = Hostname(perms_manager=mock_pm) a.add_row() a.add_row() assert len(a.output_collector.keys()) == len(a.output_collector.values()) @@ -26,7 +29,8 @@ def test_hostname_plugin_col_shape(): def test_hostname_plugin_row_shape(): for row_cnt in range(1, 10): - a = Hostname() + mock_pm = PermissionsManager() + a = Hostname(perms_manager=mock_pm) for _ in range(row_cnt): a.add_row() column_values = list(a.output_collector.values()) @@ -36,12 +40,14 @@ def test_hostname_plugin_row_shape(): def test_envprov_plugin_type(): - plug = SystemKernel() + mock_pm = PermissionsManager() + plug = SystemKernel(perms_manager=mock_pm) assert type(plug.output_collector) == collections.OrderedDict def test_envprov_plugin_adds_rows(): - plug = SystemKernel() + mock_pm = PermissionsManager() + plug = SystemKernel(perms_manager=mock_pm) plug.add_row() plug.add_row() @@ -53,15 +59,17 @@ def test_envprov_plugin_adds_rows(): def test_bueno_plugin_type(): + mock_pm = PermissionsManager() path = '/'.join([get_git_root('.'), 'dsi/data', 'bueno.data']) - plug = Bueno(filename=path) + plug = Bueno(filename=path, perms_manager=mock_pm) plug.add_row() assert type(plug.output_collector) == collections.OrderedDict def test_bueno_plugin_adds_rows(): + mock_pm = PermissionsManager() path = '/'.join([get_git_root('.'), 'dsi/data', 'bueno.data']) - plug = Bueno(filename=path) + plug = Bueno(filename=path, perms_manager=mock_pm) plug.add_row() plug.add_row() @@ -73,15 +81,17 @@ def test_bueno_plugin_adds_rows(): def test_git_plugin_type(): + mock_pm = PermissionsManager() root = get_git_root('.') - plug = GitInfo(git_repo_path=root) + plug = GitInfo(git_repo_path=root, perms_manager=mock_pm) plug.add_row() assert type(plug.output_collector) == collections.OrderedDict def test_git_plugin_adds_rows(): + mock_pm = PermissionsManager() root = get_git_root('.') - plug = GitInfo(git_repo_path=root) + plug = GitInfo(git_repo_path=root, perms_manager=mock_pm) plug.add_row() plug.add_row() @@ -93,8 +103,9 @@ def test_git_plugin_adds_rows(): def test_git_plugin_infos_are_str(): + mock_pm = PermissionsManager() root = get_git_root('.') - plug = GitInfo(git_repo_path=root) + plug = GitInfo(git_repo_path=root, perms_manager=mock_pm) plug.add_row() assert type(plug.output_collector["git-remote"][0]) == str From aa809c330727a758646549e8baad50b190d4f9f8 Mon Sep 17 00:00:00 2001 From: Daniel Johnson Date: Tue, 18 Jul 2023 11:57:37 -0600 Subject: [PATCH 04/18] use get_git_root instead of cwd --- dsi/tests/test_core.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dsi/tests/test_core.py b/dsi/tests/test_core.py index 9e1e6d5b..13b1af80 100644 --- a/dsi/tests/test_core.py +++ b/dsi/tests/test_core.py @@ -1,6 +1,13 @@ +import git from dsi.core import Terminal +def get_git_root(path): + git_repo = git.Repo(path, search_parent_directories=True) + git_root = git_repo.git.rev_parse("--show-toplevel") + return (git_root) + + def test_terminal_module_getter(): a = Terminal() plugins = a.list_available_modules('plugin') @@ -10,7 +17,8 @@ def test_terminal_module_getter(): def test_unload_module(): a = Terminal() - a.load_module('plugin', 'GitInfo', 'producer') + a.load_module('plugin', 'GitInfo', 'producer', + git_repo_path=get_git_root('.')) assert len(a.list_loaded_modules()['producer']) == 1 a.unload_module('plugin', 'GitInfo', 'producer') assert len(a.list_loaded_modules()['producer']) == 0 From 4e4c1ae9b757c03722be185b43f1ae5ca96d17dc Mon Sep 17 00:00:00 2001 From: Daniel Johnson Date: Tue, 18 Jul 2023 11:58:01 -0600 Subject: [PATCH 05/18] pass perms_manager into drivers, still need to use it --- dsi/drivers/gufi.py | 5 +++-- dsi/drivers/sqlite.py | 6 ++++-- dsi/drivers/tests/test_gufi.py | 5 ++++- dsi/drivers/tests/test_sqlite.py | 13 +++++++++---- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/dsi/drivers/gufi.py b/dsi/drivers/gufi.py index ff6bdbfb..ecddcdcf 100644 --- a/dsi/drivers/gufi.py +++ b/dsi/drivers/gufi.py @@ -31,7 +31,7 @@ class Gufi(Filesystem): column: column name from the DSI db to join on """ - def __init__(self, prefix, index, dbfile, table, column, verbose=False): + def __init__(self, prefix, index, dbfile, table, column, verbose=False, **kwargs): ''' prefix: prefix to GUFI commands index: directory with GUFI indexes @@ -41,7 +41,7 @@ def __init__(self, prefix, index, dbfile, table, column, verbose=False): verbose: print debugging statements or not ''' - super().__init__(dbfile) + super().__init__(dbfile, **kwargs) # prefix is the prefix to the GUFI installation self.prefix = prefix # index is the directory where the GUFI indexes are stored @@ -65,6 +65,7 @@ def get_artifacts(self, query): resout = self._run_gufi_query(query) if self.isVerbose: print(resout) + # TODO: Needs to register permissions for columns return resout diff --git a/dsi/drivers/sqlite.py b/dsi/drivers/sqlite.py index 1597c6b9..b8ee9f42 100644 --- a/dsi/drivers/sqlite.py +++ b/dsi/drivers/sqlite.py @@ -34,7 +34,8 @@ class Sqlite(Filesystem): con = None cur = None - def __init__(self, filename): + def __init__(self, filename, **kwargs): + super().__init__(filename, **kwargs) self.filename = filename self.con = sqlite3.connect(filename) self.cur = self.con.cursor() @@ -191,7 +192,8 @@ def get_artifact_list(self, isVerbose=False): # Returns reference from query def get_artifacts(self, query): - self.get_artifacts_list() + # TODO: Needs to register permissions by column + return self.get_artifacts_list() # Closes connection to server def close(self): diff --git a/dsi/drivers/tests/test_gufi.py b/dsi/drivers/tests/test_gufi.py index 83033be3..78ab9f91 100644 --- a/dsi/drivers/tests/test_gufi.py +++ b/dsi/drivers/tests/test_gufi.py @@ -1,4 +1,5 @@ from dsi.drivers.gufi import Gufi +from dsi.permissions.permissions import PermissionsManager isVerbose = False @@ -9,7 +10,9 @@ def test_artifact_query(): prefix = "/usr/local/bin" table = "sample" column = "sample_col" - store = Gufi(prefix, index, dbpath, table, column, isVerbose) + mock_pm = PermissionsManager() + store = Gufi(prefix, index, dbpath, table, column, + isVerbose, perms_manager=mock_pm) sqlstr = "select * from dsi_entries" rows = store.get_artifacts(sqlstr) store.close() diff --git a/dsi/drivers/tests/test_sqlite.py b/dsi/drivers/tests/test_sqlite.py index 14ae0f5b..7450ff70 100644 --- a/dsi/drivers/tests/test_sqlite.py +++ b/dsi/drivers/tests/test_sqlite.py @@ -1,6 +1,7 @@ import git from dsi.drivers.sqlite import Sqlite, DataType +from dsi.permissions.permissions import PermissionsManager isVerbose = True @@ -13,7 +14,8 @@ def get_git_root(path): def test_wildfire_data_sql_artifact(): dbpath = "wildfire.db" - store = Sqlite(dbpath) + mock_pm = PermissionsManager() + store = Sqlite(dbpath, perms_manager=mock_pm) store.close() # No error implies success assert True @@ -22,7 +24,8 @@ def test_wildfire_data_sql_artifact(): def test_wildfire_data_csv_artifact(): csvpath = '/'.join([get_git_root('.'), 'dsi/data/wildfiredata.csv']) dbpath = "wildfire.db" - store = Sqlite(dbpath) + mock_pm = PermissionsManager() + store = Sqlite(dbpath, perms_manager=mock_pm) store.put_artifacts_csv(csvpath, "simulation", isVerbose=isVerbose) store.close() # No error implies success @@ -32,7 +35,8 @@ def test_wildfire_data_csv_artifact(): def test_yosemite_data_csv_artifact(): csvpath = '/'.join([get_git_root('.'), 'dsi/data/yosemite5.csv']) dbpath = "yosemite.db" - store = Sqlite(dbpath) + mock_pm = PermissionsManager() + store = Sqlite(dbpath, perms_manager=mock_pm) store.put_artifacts_csv(csvpath, "vision", isVerbose=isVerbose) store.close() # No error implies success @@ -41,7 +45,8 @@ def test_yosemite_data_csv_artifact(): def test_artifact_query(): dbpath = "wildfire.db" - store = Sqlite(dbpath) + mock_pm = PermissionsManager() + store = Sqlite(dbpath, perms_manager=mock_pm) _ = store.get_artifact_list(isVerbose=isVerbose) data_type = DataType() data_type.name = "simulation" From 8be2327bf582357d9a22c0a7fb93394ec77bdd6a Mon Sep 17 00:00:00 2001 From: Daniel Johnson Date: Thu, 20 Jul 2023 09:34:08 -0600 Subject: [PATCH 06/18] change fileless perms to uid, egid, 660 --- dsi/permissions/permissions.py | 14 ++++++++++++-- dsi/permissions/tests/test_permissions.py | 5 +++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/dsi/permissions/permissions.py b/dsi/permissions/permissions.py index 8b69ef99..9ca20355 100644 --- a/dsi/permissions/permissions.py +++ b/dsi/permissions/permissions.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from stat import S_IRWXU, S_IRWXG, S_IRWXO -from os import stat +from os import stat, getuid, getgid @dataclass(eq=True, frozen=True) @@ -43,7 +43,8 @@ def register_columns(self, keys: list[str], perm: Permission) -> None: self.column_perms[key] = perm def register_columns_with_file(self, keys: list[str], fp: str) -> None: - uid, gid, settings = (None, None, None) if fp is None \ + """ Gets a file's Permission and links it to the given columns """ + uid, gid, settings = self.get_process_permissions() if fp is None \ else self.get_file_permissions(fp) perm = self.get_perm(uid, gid, settings) self.register_columns(keys, perm) @@ -64,6 +65,15 @@ def get_file_permissions(self, fpath: str) -> tuple[int, int, str]: settings = oct(st.st_mode & perm_mask) # select perm bits from st_mode return (uid, gid, settings) + def get_process_permissions(self) -> tuple[int, int, str]: + """ + In the event of data not coming from a file, + default to (uid, egid, 660) + """ + uid = getuid() + egid = getgid() + return (uid, egid, "0o660") + class PermissionNotFoundError(Exception): def __init__(self, perm: Permission) -> None: diff --git a/dsi/permissions/tests/test_permissions.py b/dsi/permissions/tests/test_permissions.py index 6fc7dd0f..2d7dd7a8 100644 --- a/dsi/permissions/tests/test_permissions.py +++ b/dsi/permissions/tests/test_permissions.py @@ -23,8 +23,9 @@ def test_multiple_files_different_perms(): term.transload() for env_col in ('uid', 'effective_gid', 'moniker', 'gid_list'): - assert tuple(term.perms_manager.column_perms[env_col]) == \ - (None, None, None) + uid, gid, settings = tuple(term.perms_manager.column_perms[env_col]) + assert type(uid) == type(gid) == int + assert settings == '0o660' for bueno_col in ('foo', 'bar', 'baz'): uid, gid, settings = tuple(term.perms_manager.column_perms[bueno_col]) From 4d96c3761e6279cd5c8707075e9fd9acf5989038 Mon Sep 17 00:00:00 2001 From: Daniel Johnson Date: Fri, 21 Jul 2023 14:17:28 -0600 Subject: [PATCH 07/18] add driver file writing per permissions --- dsi/data/dummy_data.pq | Bin 1316 -> 0 bytes dsi/drivers/filesystem.py | 24 ++++++++++++++ dsi/drivers/parquet.py | 8 +++-- dsi/permissions/permissions.py | 26 ++++++++++++++- dsi/permissions/tests/test_permissions.py | 38 +++++++++++++++------- 5 files changed, 82 insertions(+), 14 deletions(-) delete mode 100644 dsi/data/dummy_data.pq diff --git a/dsi/data/dummy_data.pq b/dsi/data/dummy_data.pq deleted file mode 100644 index bc57a7ce984d4b38b4469a7e75564f39bc130738..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1316 zcmcIk!A{#i5FKw^*OhvR5WBJ^A9BG$Do{W~iYhpa1GE(&XjFv4EhJWgpd>Ych#TT- zdaO8cjC$%n^k+J=&dOF2gg9hm&Ayr4dHZJ9_7T?Bq%DhbeqRa+a{z8&L244@oK#Dc zFG@!`f#;f;3@KQglBN20B|%p}Tc103 zrzgjRs`b%&L=JKLc&uJ$bM@Q2s`aIHiwECnX@$c`H$jVtob=T(8l<{+vi^*orY>NY zn&vxOFJE%s6i|JW`yH8 None: + """ + Write out a given collection to multiple files, one per + unique permission. File are written with `write_func` + and those files are set to their corresponding permission. + """ + f_map = self.get_output_file_mapping(f_basename, f_ext) + for f, cols in f_map.items(): # Write one file for each unique permission + coll = {col: collection[col] for col in cols} + write_func(coll, f) + self.perms_manager.set_file_permissions(f_map) + + def get_output_file_mapping(self, base_filename: str, file_ext: str) -> dict[str, list[str]]: + """ + Given a base filename and extention, returns a mapping from filename + to a list of corresponding columns. Each filename encodes permissions. + """ + perms_to_cols = self.perms_manager.get_permission_columnlist_mapping() + return {(base_filename + '-' + str(perm) + file_ext): cols + for perm, cols in perms_to_cols.items()} diff --git a/dsi/drivers/parquet.py b/dsi/drivers/parquet.py index 164b5c57..798a6240 100644 --- a/dsi/drivers/parquet.py +++ b/dsi/drivers/parquet.py @@ -30,8 +30,12 @@ def get_artifacts(self): def put_artifacts(self, collection): """Put artifacts into file at filename path.""" - table = pa.table(collection) - pq.write_table(table, self.filename, compression=self.compression) + def write_dict(collection, fname): + table = pa.table(collection) + pq.write_table(table, fname, compression=self.compression) + + self.write_files(collection, write_func=write_dict, + f_basename=self.filename[:-3], f_ext='.pq') @staticmethod def get_cmd_output(cmd: list) -> str: diff --git a/dsi/permissions/permissions.py b/dsi/permissions/permissions.py index 9ca20355..f7ae9074 100644 --- a/dsi/permissions/permissions.py +++ b/dsi/permissions/permissions.py @@ -1,6 +1,7 @@ +from collections import defaultdict from dataclasses import dataclass from stat import S_IRWXU, S_IRWXG, S_IRWXO -from os import stat, getuid, getgid +from os import stat, getuid, getgid, chown, chmod @dataclass(eq=True, frozen=True) @@ -15,6 +16,9 @@ def __iter__(self): for v in (self.uid, self.gid, self.settings): yield v + def __str__(self): + return f"{self.uid}-{self.gid}-{self.settings}" + class PermissionsManager: """ @@ -49,6 +53,15 @@ def register_columns_with_file(self, keys: list[str], fp: str) -> None: perm = self.get_perm(uid, gid, settings) self.register_columns(keys, perm) + def get_permission_columnlist_mapping(self) -> dict[Permission, list[str]]: + """ + Returns a mapping from unique Permission to list of columns. + """ + mapping = defaultdict(list) + for col, perm in self.column_perms.items(): + mapping[perm].append(col) + return mapping + def get_column_perms(self, key: str) -> Permission: """ Get the Permission of a given column """ try: @@ -65,6 +78,17 @@ def get_file_permissions(self, fpath: str) -> tuple[int, int, str]: settings = oct(st.st_mode & perm_mask) # select perm bits from st_mode return (uid, gid, settings) + def set_file_permissions(self, file_mapping: dict[str, list[str]]) -> None: + """ + Given a mapping from filename to list of columns, set each file + to its column's permissions. (All columns of a file share perms) + """ + for filename, cols in file_mapping.items(): + f_perm = self.get_column_perms(cols[0]) # cols share same perms + uid, gid, settings = tuple(f_perm) + chown(filename, uid, gid) # type: ignore + chmod(filename, int(settings, base=8)) # type: ignore + def get_process_permissions(self) -> tuple[int, int, str]: """ In the event of data not coming from a file, diff --git a/dsi/permissions/tests/test_permissions.py b/dsi/permissions/tests/test_permissions.py index 2d7dd7a8..24c42d5d 100644 --- a/dsi/permissions/tests/test_permissions.py +++ b/dsi/permissions/tests/test_permissions.py @@ -1,7 +1,9 @@ import git import os +from glob import glob from dsi.core import Terminal +from dsi.permissions.permissions import PermissionsManager def get_git_root(path): @@ -13,13 +15,9 @@ def get_git_root(path): def test_multiple_files_different_perms(): term = Terminal() bueno_path = '/'.join([get_git_root('.'), 'dsi/data', 'bueno.data']) - pq_path = '/'.join([get_git_root('.'), 'dsi/data', 'dummy_data.pq']) - os.chmod(bueno_path, 0o660) - os.chmod(pq_path, 0o664) - + os.chmod(bueno_path, 0o664) term.load_module('plugin', 'Bueno', 'consumer', filename=bueno_path) - term.load_module('driver', 'Parquet', 'back-end', filename=pq_path) - term.artifact_handler(interaction_type='get') + term.transload() for env_col in ('uid', 'effective_gid', 'moniker', 'gid_list'): @@ -30,9 +28,27 @@ def test_multiple_files_different_perms(): for bueno_col in ('foo', 'bar', 'baz'): uid, gid, settings = tuple(term.perms_manager.column_perms[bueno_col]) assert type(uid) == type(gid) == int - assert settings == '0o660' - - for pq_col in ('one', 'two', 'three'): - uid, gid, settings = tuple(term.perms_manager.column_perms[pq_col]) - assert type(uid) == type(gid) == int assert settings == '0o664' + + +def test_output_file_mapping(): + term = Terminal() + bueno_path = '/'.join([get_git_root('.'), 'dsi/data', 'bueno.data']) + os.chmod(bueno_path, 0o664) + + term.load_module('plugin', 'Bueno', 'consumer', filename=bueno_path) + term.transload() + + pq_path = '/'.join([get_git_root('.'), 'dsi/data', 'dummy_data.pq']) + term.load_module('driver', 'Parquet', 'back-end', filename=pq_path) + + term.artifact_handler(interaction_type='put') + + pm = PermissionsManager() + written_paths = glob( + "/".join([get_git_root('.'), 'dsi/data']) + "/dummy_data*.pq") + for path in written_paths: + uid, gid, settings = pm.get_file_permissions(path) + assert path.find(str(uid)) != -1 and \ + path.find(str(gid)) != -1 and \ + path.find(settings) != -1 From 4b83cd4554ef08f929ecca088e0ab989b1077b40 Mon Sep 17 00:00:00 2001 From: Daniel Johnson Date: Mon, 18 Dec 2023 12:34:25 -0600 Subject: [PATCH 08/18] rename for clarity --- dsi/drivers/filesystem.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/dsi/drivers/filesystem.py b/dsi/drivers/filesystem.py index bc082705..64b2caf7 100644 --- a/dsi/drivers/filesystem.py +++ b/dsi/drivers/filesystem.py @@ -27,7 +27,7 @@ def inspect_artifacts(self): class Filesystem(Driver): - git_commit_sha: str = '5d79e08d4a6c1570ceb47cdd61d2259505c05de9' + git_commit_sha: str = "5d79e08d4a6c1570ceb47cdd61d2259505c05de9" # Declare named types DOUBLE = "DOUBLE" STRING = "VARCHAR" @@ -57,9 +57,13 @@ def get_artifacts(self, query): def inspect_artifacts(self): pass - def write_files(self, collection, - write_func: Callable[[dict[str, list], str], None], - f_basename: str, f_ext: str) -> None: + def write_files( + self, + collection, + write_func: Callable[[dict[str, list], str], None], + f_basename: str, + f_ext: str, + ) -> None: """ Write out a given collection to multiple files, one per unique permission. File are written with `write_func` @@ -67,15 +71,19 @@ def write_files(self, collection, """ f_map = self.get_output_file_mapping(f_basename, f_ext) for f, cols in f_map.items(): # Write one file for each unique permission - coll = {col: collection[col] for col in cols} - write_func(coll, f) + col_to_data = {col: collection[col] for col in cols} + write_func(col_to_data, f) self.perms_manager.set_file_permissions(f_map) - def get_output_file_mapping(self, base_filename: str, file_ext: str) -> dict[str, list[str]]: + def get_output_file_mapping( + self, base_filename: str, file_ext: str + ) -> dict[str, list[str]]: """ Given a base filename and extention, returns a mapping from filename to a list of corresponding columns. Each filename encodes permissions. """ perms_to_cols = self.perms_manager.get_permission_columnlist_mapping() - return {(base_filename + '-' + str(perm) + file_ext): cols - for perm, cols in perms_to_cols.items()} + return { + (base_filename + "-" + str(perm) + file_ext): cols + for perm, cols in perms_to_cols.items() + } From e05b60f41ede3bf1631adefb8fc60a352f8d0635 Mon Sep 17 00:00:00 2001 From: Daniel Johnson Date: Mon, 18 Dec 2023 12:35:03 -0600 Subject: [PATCH 09/18] update permissions functionality --- dsi/core.py | 204 +++++++++++++++------- dsi/permissions/permissions.py | 39 +++-- dsi/permissions/tests/test_permissions.py | 83 ++++++--- 3 files changed, 233 insertions(+), 93 deletions(-) diff --git a/dsi/core.py b/dsi/core.py index 2e759049..11be33fb 100644 --- a/dsi/core.py +++ b/dsi/core.py @@ -4,9 +4,10 @@ from itertools import product from dsi.permissions.permissions import PermissionsManager +from dsi.drivers.filesystem import Filesystem -class Terminal(): +class Terminal: """ An instantiated Terminal is the DSI human/machine interface. @@ -14,42 +15,50 @@ class Terminal(): front-ends or back-ends. Plugins may be producers or consumers. See documentation for more information. """ - DRIVER_PREFIX = ['dsi.drivers'] - DRIVER_IMPLEMENTATIONS = ['gufi', 'sqlite', 'parquet'] - PLUGIN_PREFIX = ['dsi.plugins'] - PLUGIN_IMPLEMENTATIONS = ['env'] - VALID_PLUGINS = ['Hostname', 'SystemKernel', 'Bueno', 'GitInfo'] - VALID_DRIVERS = ['Gufi', 'Sqlite', 'Parquet'] + + DRIVER_PREFIX = ["dsi.drivers"] + DRIVER_IMPLEMENTATIONS = ["gufi", "sqlite", "parquet"] + PLUGIN_PREFIX = ["dsi.plugins"] + PLUGIN_IMPLEMENTATIONS = ["env"] + VALID_PLUGINS = ["Hostname", "SystemKernel", "Bueno", "GitInfo"] + VALID_DRIVERS = ["Gufi", "Sqlite", "Parquet"] VALID_MODULES = VALID_PLUGINS + VALID_DRIVERS - VALID_MODULE_FUNCTIONS = {'plugin': [ - 'producer', 'consumer'], 'driver': ['front-end', 'back-end']} - VALID_ARTIFACT_INTERACTION_TYPES = ['get', 'set', 'put', 'inspect'] + VALID_MODULE_FUNCTIONS = { + "plugin": ["producer", "consumer"], + "driver": ["front-end", "back-end"], + } + VALID_ARTIFACT_INTERACTION_TYPES = ["get", "set", "put", "inspect"] + + def __init__(self, allow_multiple_permissions=False, squash_permissions=False): + self.allow_multiple_permissions = allow_multiple_permissions + self.squash_permissions = squash_permissions - def __init__(self): # Helper function to get parent module names. def static_munge(prefix, implementations): - return (['.'.join(i) for i in product(prefix, implementations)]) + return [".".join(i) for i in product(prefix, implementations)] self.module_collection = {} - driver_modules = static_munge( - self.DRIVER_PREFIX, self.DRIVER_IMPLEMENTATIONS) - self.module_collection['driver'] = {} + driver_modules = static_munge(self.DRIVER_PREFIX, self.DRIVER_IMPLEMENTATIONS) + self.module_collection["driver"] = {} for module in driver_modules: - self.module_collection['driver'][module] = import_module(module) + self.module_collection["driver"][module] = import_module(module) - plugin_modules = static_munge( - self.PLUGIN_PREFIX, self.PLUGIN_IMPLEMENTATIONS) - self.module_collection['plugin'] = {} + plugin_modules = static_munge(self.PLUGIN_PREFIX, self.PLUGIN_IMPLEMENTATIONS) + self.module_collection["plugin"] = {} for module in plugin_modules: - self.module_collection['plugin'][module] = import_module(module) + self.module_collection["plugin"][module] = import_module(module) self.active_modules = {} - valid_module_functions_flattened = self.VALID_MODULE_FUNCTIONS['plugin'] + \ - self.VALID_MODULE_FUNCTIONS['driver'] + valid_module_functions_flattened = ( + self.VALID_MODULE_FUNCTIONS["plugin"] + + self.VALID_MODULE_FUNCTIONS["driver"] + ) for valid_function in valid_module_functions_flattened: self.active_modules[valid_function] = [] self.active_metadata = OrderedDict() - self.perms_manager = PermissionsManager() + self.perms_manager = PermissionsManager( + allow_multiple_permissions, squash_permissions + ) self.transload_lock = False def list_available_modules(self, mod_type): @@ -67,8 +76,9 @@ def list_available_modules(self, mod_type): for python_module, classlist in self.module_collection[mod_type].items(): # In the next line, both "class" and VALID_MODULES refer to DSI modules. class_collector.extend( - [x for x in dir(classlist) if x in self.VALID_MODULES]) - return (class_collector) + [x for x in dir(classlist) if x in self.VALID_MODULES] + ) + return class_collector def load_module(self, mod_type, mod_name, mod_function, **kwargs): """ @@ -79,16 +89,24 @@ def load_module(self, mod_type, mod_name, mod_function, **kwargs): We expect most users will work with module implementations rather than templates, but but all high level class abstractions are accessible with this method. """ - if self.transload_lock and mod_type == 'plugin': - print('Plugin module loading is prohibited after transload. No action taken.') + if self.transload_lock and mod_type == "plugin": + print( + "Plugin module loading is prohibited after transload. No action taken." + ) return if mod_function not in self.VALID_MODULE_FUNCTIONS[mod_type]: print( - 'Hint: Did you declare your Module Function in the Terminal Global vars?') + "Hint: Did you declare your Module Function in the Terminal Global vars?" + ) raise NotImplementedError - if mod_name in [obj.__class__.__name__ for obj in self.active_modules[mod_function]]: - print('{} {} already loaded as {}. Nothing to do.'.format( - mod_name, mod_type, mod_function)) + if mod_name in [ + obj.__class__.__name__ for obj in self.active_modules[mod_function] + ]: + print( + "{} {} already loaded as {}. Nothing to do.".format( + mod_name, mod_type, mod_function + ) + ) return # DSI Modules are Python classes. class_name = mod_name @@ -103,39 +121,50 @@ def load_module(self, mod_type, mod_name, mod_function, **kwargs): except AttributeError: continue if load_success: - print('{} {} {} loaded successfully.'.format( - mod_name, mod_type, mod_function)) + print( + "{} {} {} loaded successfully.".format(mod_name, mod_type, mod_function) + ) else: - print('Hint: Did you declare your Plugin/Driver in the Terminal Global vars?') + print( + "Hint: Did you declare your Plugin/Driver in the Terminal Global vars?" + ) raise NotImplementedError def unload_module(self, mod_type, mod_name, mod_function): """ Unloads a DSI module from the active_modules collection """ - if self.transload_lock and mod_type == 'plugin': + if self.transload_lock and mod_type == "plugin": print( - 'Plugin module unloading is prohibited after transload. No action taken.') + "Plugin module unloading is prohibited after transload. No action taken." + ) return for i, mod in enumerate(self.active_modules[mod_function]): if mod.__class__.__name__ == mod_name: self.active_modules[mod_function].pop(i) - print("{} {} {} unloaded successfully.".format( - mod_name, mod_type, mod_function)) + print( + "{} {} {} unloaded successfully.".format( + mod_name, mod_type, mod_function + ) + ) return - print("{} {} {} could not be found in active_modules. No action taken.".format( - mod_name, mod_type, mod_function)) + print( + "{} {} {} could not be found in active_modules. No action taken.".format( + mod_name, mod_type, mod_function + ) + ) def add_external_python_module(self, mod_type, mod_name, mod_path): """ - Adds a given external, meaning not from the DSI repo, Python module to the module_collection. + Adds a given external, meaning not from the DSI repo, Python module to the module_collection Afterwards, load_module can be used to load a DSI module from the added Python module. - Note: mod_type is needed because each Python module should only implement plugins or drivers. + Note: mod_type is needed because each Python module should only implement plugins or drivers For example, term = Terminal() - term.add_external_python_module('plugin', 'my_python_file', '/the/path/to/my_python_file.py') + term.add_external_python_module('plugin', 'my_python_file', + '/the/path/to/my_python_file.py') term.load_module('plugin', 'MyPlugin', 'consumer') term.list_loaded_modules() # includes MyPlugin """ @@ -148,7 +177,7 @@ def list_loaded_modules(self): These Plugins and Drivers are active or ready to execute a post-processing task. """ - return (self.active_modules) + return self.active_modules def transload(self, **kwargs): """ @@ -159,7 +188,8 @@ def transload(self, **kwargs): data sources to a single DSI Core Middleware data structure. """ selected_function_modules = dict( - (k, self.active_modules[k]) for k in ('producer', 'consumer')) + (k, self.active_modules[k]) for k in ("producer", "consumer") + ) # Note this transload supports plugin.env Environment types now. for module_type, objs in selected_function_modules.items(): for obj in objs: @@ -168,44 +198,100 @@ def transload(self, **kwargs): self.active_metadata[col_name] = col_metadata self.transload_lock = True - def artifact_handler(self, interaction_type, **kwargs): + def artifact_handler(self, interaction_type, **kwargs) -> bool: """ Store or retrieve using all loaded DSI Drivers with back-end functionality. A DSI Core Terminal may load zero or more Drivers with back-end storage functionality. Calling artifact_handler will execute all back-end functionality currently loaded, given the provided ``interaction_type``. + + Returns whether the interaction was successful or not. """ if interaction_type not in self.VALID_ARTIFACT_INTERACTION_TYPES: print( - 'Hint: Did you declare your artifact interaction type in the Terminal Global vars?') + "Hint: Did you declare your artifact interaction type in the Terminal Global vars?" + ) raise NotImplementedError operation_success = False # Perform artifact movement first, because inspect implementation may rely on # self.active_metadata or some stored artifact. selected_function_modules = dict( - (k, self.active_modules[k]) for k in (['back-end'])) + (k, self.active_modules[k]) for k in (["back-end"]) + ) + + if interaction_type == "put" or interaction_type == "set": + should_continue = self.handle_permissions_interactions( + selected_function_modules + ) + if not should_continue: + return False + for module_type, objs in selected_function_modules.items(): for obj in objs: - if interaction_type == 'put' or interaction_type == 'set': - obj.put_artifacts( - collection=self.active_metadata, **kwargs) + if interaction_type == "put" or interaction_type == "set": + obj.put_artifacts(collection=self.active_metadata, **kwargs) operation_success = True - elif interaction_type == 'get': + elif interaction_type == "get": self.active_metadata.update(obj.get_artifacts(**kwargs)) operation_success = True - if interaction_type == 'inspect': + if interaction_type == "inspect": for module_type, objs in selected_function_modules.items(): for obj in objs: - obj.put_artifacts( - collection=self.active_metadata, **kwargs) + obj.put_artifacts(collection=self.active_metadata, **kwargs) self.active_metadata = obj.inspect_artifacts( - collection=self.active_metadata, **kwargs) + collection=self.active_metadata, **kwargs + ) operation_success = True if operation_success: - return + return operation_success else: print( - 'Hint: Did you implement a case for your artifact interaction in the \ - artifact_handler loop?') + "Hint: Did you implement a case for your artifact interaction in the \ + artifact_handler loop?" + ) raise NotImplementedError + + def handle_permissions_interactions(self, backends) -> bool: + if ( + self.perms_manager.has_multiple_permissions() + and not self.allow_multiple_permissions + ): + print( + "Data has multiple permissions as shown here: \n" + + self.put_report(backends) + + "However, allow_multiple_permissions is not true.\n" + + "No action taken." + ) + return False + elif self.squash_permissions: + msg = ( + "WARNING: One file will be written, throwing out all " + + "permissions attached as shown below:\n" + + self.put_report(backends) + + "THIS SHOULD BE DONE WITH EXTREME CAUTION! Continue? (y/n): " + ) + if input(msg).lower().strip() != "y": + print("No action taken.") + return False + elif self.allow_multiple_permissions: + msg = ( + "WARNING: One file will be written for each POSIX permission " + + "present in read files as shown below:\n" + + self.put_report(backends) + + "Continue? (y/n)" + ) + if input(msg).lower().strip() != "y": + print("No action taken.") + return False + return True + + def put_report(self, backends) -> str: + report = "" + for fs in filter(lambda d: isinstance(d, Filesystem), backends): + perm_raw = self.perms_manager.get_file_permissions(fs.filename) + perm_registered = self.perms_manager.get_perm( + *perm_raw + ) # may be different if squash_permissions + report += fs.filename + " (" + str(perm_registered) + ")\n" + return report diff --git a/dsi/permissions/permissions.py b/dsi/permissions/permissions.py index f7ae9074..346e4262 100644 --- a/dsi/permissions/permissions.py +++ b/dsi/permissions/permissions.py @@ -6,13 +6,14 @@ @dataclass(eq=True, frozen=True) class Permission: - """ A simple dataclass to represent POSIX file permissions """ + """A simple dataclass to represent POSIX file permissions""" + uid: int gid: int settings: str def __iter__(self): - """ enables conversion to tuple, list, etc. """ + """enables conversion to tuple, list, etc.""" for v in (self.uid, self.gid, self.settings): yield v @@ -27,29 +28,38 @@ class PermissionsManager: permission is shared and only stored in memory once. """ - def __init__(self): + def __init__(self, allow_multiple_permissions=False, squash_permissions=False): self.perms_collection = {} self.column_perms = {} + self.allow_multiple_permissions = allow_multiple_permissions + self.squash_permissions = squash_permissions def get_perm(self, uid, gid, settings) -> Permission: - """ If a perm already exists, return it. Else create it. """ + """If a perm already exists, return it. Else create it.""" if (uid, gid, settings) in self.perms_collection: return self.perms_collection[(uid, gid, settings)] - perm = Permission(uid, gid, settings) + perm = ( + Permission(uid, gid, settings) + if not self.squash_permissions + else Permission(*self.get_process_permissions()) + ) self.perms_collection[(uid, gid, settings)] = perm return perm def register_columns(self, keys: list[str], perm: Permission) -> None: - """ Links a list of column names to a given permission """ + """Links a list of column names to a given permission""" if tuple(perm) not in self.perms_collection: raise PermissionNotFoundError(perm) for key in keys: self.column_perms[key] = perm def register_columns_with_file(self, keys: list[str], fp: str) -> None: - """ Gets a file's Permission and links it to the given columns """ - uid, gid, settings = self.get_process_permissions() if fp is None \ + """Gets a file's Permission and links it to the given columns""" + uid, gid, settings = ( + self.get_process_permissions() + if fp is None else self.get_file_permissions(fp) + ) perm = self.get_perm(uid, gid, settings) self.register_columns(keys, perm) @@ -63,14 +73,14 @@ def get_permission_columnlist_mapping(self) -> dict[Permission, list[str]]: return mapping def get_column_perms(self, key: str) -> Permission: - """ Get the Permission of a given column """ + """Get the Permission of a given column""" try: return self.column_perms[key] except KeyError: raise ColumnNotRegisteredError(key) def get_file_permissions(self, fpath: str) -> tuple[int, int, str]: - """ Given a filepath, returns (uid, gid, settings) """ + """Given a filepath, returns (uid, gid, settings)""" st = stat(fpath) # includes info on filetype, perms, etc. uid = st.st_uid gid = st.st_gid @@ -98,11 +108,16 @@ def get_process_permissions(self) -> tuple[int, int, str]: egid = getgid() return (uid, egid, "0o660") + def has_multiple_permissions(self) -> bool: + return len(self.perms_collection.keys()) > 1 + class PermissionNotFoundError(Exception): def __init__(self, perm: Permission) -> None: - self.msg = f"Permission {perm} not found. Make sure to use get_perm instead of " + \ - "manually instantiating a Permission to register." + self.msg = ( + f"Permission {perm} not found. Make sure to use get_perm instead of " + + "manually instantiating a Permission to register." + ) super().__init__(self.msg) diff --git a/dsi/permissions/tests/test_permissions.py b/dsi/permissions/tests/test_permissions.py index 24c42d5d..dbb243bf 100644 --- a/dsi/permissions/tests/test_permissions.py +++ b/dsi/permissions/tests/test_permissions.py @@ -9,46 +9,85 @@ def get_git_root(path): git_repo = git.Repo(path, search_parent_directories=True) git_root = git_repo.git.rev_parse("--show-toplevel") - return (git_root) + return git_root -def test_multiple_files_different_perms(): +def test_multiple_perms_fails_by_default(): term = Terminal() - bueno_path = '/'.join([get_git_root('.'), 'dsi/data', 'bueno.data']) + bueno_path = "/".join([get_git_root("."), "dsi/data", "bueno.data"]) os.chmod(bueno_path, 0o664) - term.load_module('plugin', 'Bueno', 'consumer', filename=bueno_path) + term.load_module("plugin", "Bueno", "consumer", filename=bueno_path) + term.transload() + assert not term.artifact_handler(interaction_type="put") + + +def test_multiple_permissions_register_correctly(): + term = Terminal(allow_multiple_permissions=True) + bueno_path = "/".join([get_git_root("."), "dsi/data", "bueno.data"]) + os.chmod(bueno_path, 0o664) + term.load_module("plugin", "Bueno", "consumer", filename=bueno_path) term.transload() - for env_col in ('uid', 'effective_gid', 'moniker', 'gid_list'): + for env_col in ("uid", "effective_gid", "moniker", "gid_list"): uid, gid, settings = tuple(term.perms_manager.column_perms[env_col]) - assert type(uid) == type(gid) == int - assert settings == '0o660' + assert isinstance(uid, int) + assert isinstance(gid, int) + assert settings == "0o660" - for bueno_col in ('foo', 'bar', 'baz'): + for bueno_col in ("foo", "bar", "baz"): uid, gid, settings = tuple(term.perms_manager.column_perms[bueno_col]) - assert type(uid) == type(gid) == int - assert settings == '0o664' + assert isinstance(uid, int) + assert isinstance(gid, int) + assert settings == "0o664" -def test_output_file_mapping(): - term = Terminal() - bueno_path = '/'.join([get_git_root('.'), 'dsi/data', 'bueno.data']) +def test_squash_permissions_register_correctly(): + term = Terminal(squash_permissions=True) + bueno_path = "/".join([get_git_root("."), "dsi/data", "bueno.data"]) + os.chmod(bueno_path, 0o664) + term.load_module("plugin", "Bueno", "consumer", filename=bueno_path) + + term.transload() + + for env_col in ("uid", "effective_gid", "moniker", "gid_list"): + uid, gid, settings = tuple(term.perms_manager.column_perms[env_col]) + assert isinstance(uid, int) + assert isinstance(gid, int) + assert settings == "0o660" + + for bueno_col in ("foo", "bar", "baz"): + uid, gid, settings = tuple(term.perms_manager.column_perms[bueno_col]) + assert isinstance(uid, int) + assert isinstance(gid, int) + assert settings == "0o660" + + +def test_permissions_output_correctly(monkeypatch): + monkeypatch.setattr("builtins.input", lambda _: "y") # mock input + term = Terminal(allow_multiple_permissions=True) + bueno_path = "/".join([get_git_root("."), "dsi/data", "bueno.data"]) os.chmod(bueno_path, 0o664) - term.load_module('plugin', 'Bueno', 'consumer', filename=bueno_path) + term.load_module("plugin", "Bueno", "consumer", filename=bueno_path) term.transload() - pq_path = '/'.join([get_git_root('.'), 'dsi/data', 'dummy_data.pq']) - term.load_module('driver', 'Parquet', 'back-end', filename=pq_path) + pq_path = "/".join([get_git_root("."), "dsi/data", "dummy_data.pq"]) + term.load_module("driver", "Parquet", "back-end", filename=pq_path) - term.artifact_handler(interaction_type='put') + term.artifact_handler(interaction_type="put") pm = PermissionsManager() - written_paths = glob( - "/".join([get_git_root('.'), 'dsi/data']) + "/dummy_data*.pq") + written_paths = glob("/".join([get_git_root("."), "dsi/data"]) + "/dummy_data*.pq") for path in written_paths: uid, gid, settings = pm.get_file_permissions(path) - assert path.find(str(uid)) != -1 and \ - path.find(str(gid)) != -1 and \ - path.find(settings) != -1 + if settings == "0o664": # the bueno file + old_uid, old_gid, old_settings = pm.get_file_permissions(bueno_path) + assert uid == old_uid + assert gid == old_gid + assert settings == old_settings + assert ( + path.find(str(uid)) != -1 + and path.find(str(gid)) != -1 + and path.find(settings) != -1 + ) From eb49e75581f437990556fed5b6c6d3edaf99827a Mon Sep 17 00:00:00 2001 From: qwofford Date: Tue, 19 Dec 2023 15:09:36 -0700 Subject: [PATCH 10/18] remaining merge cleanups --- dsi/plugins/env.py | 95 ----------------------------------- dsi/plugins/metadata.py | 3 -- dsi/plugins/tests/test_env.py | 9 +--- 3 files changed, 2 insertions(+), 105 deletions(-) diff --git a/dsi/plugins/env.py b/dsi/plugins/env.py index aca00ed6..27af35d7 100644 --- a/dsi/plugins/env.py +++ b/dsi/plugins/env.py @@ -45,11 +45,7 @@ def __init__(self, **kwargs) -> None: def pack_header(self) -> None: """Set schema with keys of prov_info.""" column_names = list(self.posix_info.keys()) + ["hostname"] -<<<<<<< HEAD self.set_schema(column_names) -======= - self.set_schema(column_names, validation_model=HostnameModel) ->>>>>>> 3055a541fac93aa877cb42e13273b3c3ec047877 self.perms_manager.register_columns_with_file(column_names, None) def add_rows(self) -> None: @@ -61,60 +57,6 @@ def add_rows(self) -> None: self.add_to_output(row) -<<<<<<< HEAD -class Bueno(Environment): - """ - A Plugin to capture performance data from Bueno (github.com/lanl/bueno) - - Bueno outputs performance data in keyvalue pairs in a file. Keys and values - are delimited by ``:``. Keyval pairs are delimited by ``\\n``. - """ - - def __init__(self, filename, **kwargs) -> None: - super().__init__(**kwargs) - self.bueno_data = OrderedDict() - self.filename = filename - - def pack_header(self) -> None: - """Set schema with POSIX and Bueno data.""" -<<<<<<< HEAD - posix_columns = list(self.posix_info.keys()) - bueno_columns = list(self.bueno_data.keys()) - self.perms_manager.register_columns_with_file(posix_columns, None) - self.perms_manager.register_columns_with_file( - bueno_columns, self.filename) - - column_names = posix_columns + bueno_columns - self.set_schema(column_names) -======= - bueno_names = list(self.bueno_data.keys()) - column_names = list(self.posix_info.keys()) + bueno_names - model = create_dynamic_model("BuenoModel", col_names=bueno_names, - col_types=[str] * len(bueno_names), base=EnvironmentModel) - self.set_schema(column_names, validation_model=model) ->>>>>>> main - - def add_row(self) -> None: - """Parses environment provenance data and adds the row.""" - if not self.schema_is_set(): - with open(self.filename, 'r') as fh: - file_content = (fh.read()) - rows = file_content.split('\n') - drop_rows = [row for row in rows if row != ''] - rows = drop_rows - for row in rows: - colon_split = row.split(':', maxsplit=1) - if len(colon_split) != 2: - raise TypeError - self.bueno_data[colon_split[0]] = colon_split[1] - self.pack_header() - - row = list(self.posix_info.values()) + list(self.bueno_data.values()) - self.add_to_output(row) - - -======= ->>>>>>> 3055a541fac93aa877cb42e13273b3c3ec047877 class GitInfo(Environment): """ A Plugin to capture Git information. @@ -139,22 +81,10 @@ def __init__(self, git_repo_path="./", **kwargs) -> None: } def pack_header(self) -> None: -<<<<<<< HEAD - """ Set schema with POSIX and Git columns """ - column_names = list(self.posix_info.keys()) + \ - list(self.git_info.keys()) -<<<<<<< HEAD - self.set_schema(column_names) - self.perms_manager.register_columns_with_file(column_names, None) -======= - self.set_schema(column_names, validation_model=GitInfoModel) ->>>>>>> main -======= """Set schema with POSIX and Git columns""" column_names = list(self.posix_info.keys()) + list(self.git_info.keys()) self.set_schema(column_names, validation_model=GitInfoModel) self.perms_manager.register_columns_with_file(column_names, None) ->>>>>>> 3055a541fac93aa877cb42e13273b3c3ec047877 def add_rows(self) -> None: """Adds a row to the output with POSIX info, git remote, and git commit""" @@ -167,7 +97,6 @@ def add_rows(self) -> None: ] self.add_to_output(row) - class SystemKernel(Environment): """ Plugin for reading environment provenance data. @@ -183,39 +112,15 @@ class SystemKernel(Environment): def __init__(self, **kwargs) -> None: """Initialize SystemKernel with inital provenance info.""" -<<<<<<< HEAD -<<<<<<< HEAD - super().__init__(**kwargs) - self.prov_info = self.get_prov_info() - - def pack_header(self) -> None: - """Set schema with keys of prov_info.""" - column_names = list(self.posix_info.keys()) + \ - list(self.prov_info.keys()) - self.set_schema(column_names) - self.perms_manager.register_columns_with_file(column_names, None) -======= - super().__init__() -======= super().__init__(**kwargs) self.prov_info = self.get_prov_info() ->>>>>>> 3055a541fac93aa877cb42e13273b3c3ec047877 self.column_names = ["kernel_info"] def pack_header(self) -> None: """Set schema with keys of prov_info.""" -<<<<<<< HEAD - prov_info_names = list(self.prov_info.keys()) - column_names = list(self.posix_info.keys()) + prov_info_names - model = create_dynamic_model("BuenoModel", col_names=prov_info_names, - col_types=[str] * len(prov_info_names), base=EnvironmentModel) - self.set_schema(column_names, validation_model=model) ->>>>>>> main -======= column_names = list(self.posix_info.keys()) + self.column_names self.set_schema(column_names, validation_model=SystemKernelModel) self.perms_manager.register_columns_with_file(column_names, None) ->>>>>>> 3055a541fac93aa877cb42e13273b3c3ec047877 def add_rows(self) -> None: """Parses environment provenance data and adds the row.""" diff --git a/dsi/plugins/metadata.py b/dsi/plugins/metadata.py index fc49626c..779251bd 100644 --- a/dsi/plugins/metadata.py +++ b/dsi/plugins/metadata.py @@ -41,8 +41,6 @@ def __init__(self, **kwargs): self.output_collector = OrderedDict() self.column_cnt = None # schema not set until pack_header self.perms_manager = kwargs['perms_manager'] -<<<<<<< HEAD -======= self.validation_model = None # optional pydantic Model # Check for strict_mode option if 'strict_mode' in kwargs: @@ -55,7 +53,6 @@ def __init__(self, **kwargs): self.strict_mode = False # Lock to enforce strict mode self.strict_mode_lock = False ->>>>>>> 3055a541fac93aa877cb42e13273b3c3ec047877 def set_schema(self, column_names: list, validation_model=None) -> None: """ diff --git a/dsi/plugins/tests/test_env.py b/dsi/plugins/tests/test_env.py index f8088fe4..880ebe1f 100644 --- a/dsi/plugins/tests/test_env.py +++ b/dsi/plugins/tests/test_env.py @@ -49,13 +49,8 @@ def test_envprov_plugin_type(): def test_envprov_plugin_adds_rows(): mock_pm = PermissionsManager() plug = SystemKernel(perms_manager=mock_pm) -<<<<<<< HEAD - plug.add_row() - plug.add_row() -======= plug.add_rows() plug.add_rows() ->>>>>>> 3055a541fac93aa877cb42e13273b3c3ec047877 for key, val in plug.output_collector.items(): assert len(val) == 2 @@ -72,8 +67,8 @@ def test_systemkernel_plugin_blob_is_big(): blob = plug.output_collector["kernel_info"][0] info_dict = loads(blob) - # 3 Bueno cols + 4 inherited Env cols - assert len(plug.output_collector.keys()) == 7 + # 1 systemkernel col + 4 inherited Env cols + assert len(plug.output_collector.keys()) == 5 # dict should have more than 1000 (~7000) keys assert len(info_dict.keys()) > 1000 From fb1e02c7969baea53bc61c397cbdd4b13e5f5631 Mon Sep 17 00:00:00 2001 From: qwofford Date: Tue, 19 Dec 2023 15:45:13 -0700 Subject: [PATCH 11/18] update documentation for permissions manager --- docs/index.rst | 2 +- docs/permissions.rst | 51 ++++++++++++++++++++++++++++++++++++++++++++ dsi/core.py | 1 - 3 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 docs/permissions.rst diff --git a/docs/index.rst b/docs/index.rst index 5b3df85d..ed68746f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,7 +16,7 @@ The Data Science Infrastructure Project (DSI) core plugins drivers - + permissions Indices and tables ================== diff --git a/docs/permissions.rst b/docs/permissions.rst new file mode 100644 index 00000000..3a7dafd3 --- /dev/null +++ b/docs/permissions.rst @@ -0,0 +1,51 @@ +Permissions +=================== +DSI is capable of consuming information from files, environments, and in-situ processes which may or may not have the same permissions authority. To track this information for the purposes of returning user queries into DSI storage, we utilize a permissions handler. The permissions handler bundles the authority by which information is read and adds this to each column data structure. Most relational database systems require that types are encforced by column, and DSI extends this idea to require that permissions are enforced by column. By tracking the permissions associated with each column, DSI can save files using the same POSIX permissions authority that initially granted access to the information, therefore preserving POSIX permssions as files are saved. + +By default, DSI will stop users from saving any data if the length of the union of the set of column permissions is greater than one. This prevents users from saving files that might have complex security implications. If a user enables the `allow_multiple_permissions` parameter of the `PermissionsManager`, then the number of files that will be saved is equal to the length of the union of the set of column permissions in the middelware data structures being written. There will be one file for each set of columns read by the same permissions authority. + +By default, DSI will always respect the POSIX security information by which information was read. If the usr wishes to override this behavior and write all of their metadata to the same file with a unified UID and GID, they can enable the `squash_permissions` perameter of the `PermissionsManager`. The user should be very certain that the information they are writing is protected appropriately in this case. + +An example helps illustrate these scenarios: + +| Col A | Col B | Col C | +========================= +|Perm D |Perm D | Perm F| +|Row A1 |Row B1 | Row C1| +|Row A2 |Row B2 | Row C2| + +By default, DSI will refuse to write this data structure to disk because `len(union({D,D,F})) > 1` + +If a user enables the `allow_multiple_permissions` parameter, two files will be saved: + +>>> $ cat file1 +>>> | Col A | Col B | +>>> =================== +>>> | Perm D | Perm D | +>>> | Row A1 | Row B1 | +>>> | Row A2 | Row B2 | +>>> $ get_perms(file1) +>>> Perm D +>>> $ cat file2 +>>> | Col C | +>>> ========== +>>> | Perm F | +>>> | Row C1 | +>>> | Row C2 | +>>> $ get_perms(file2) +>>> Perm F + +If a user enables `allow_multiple_permissions` and `squash_permissions`, then a single file will be written with the users UID and effective GID and 660 access: + +>>> $ cat file +>>> | Col A | Col B | Col C | +>>> ============================ +>>> | Perm D | Perm D | Perm F | +>>> | Row A1 | Row B1 | Row C1 | +>>> | Row A2 | Row B2 | Row C2 | +>>> $ get_perms(file) +>>> My UID and Effective GID, with 660 access controls. + + +.. automodule:: dsi.permissions.permissions + :members: diff --git a/dsi/core.py b/dsi/core.py index 717a3eff..08203b3c 100644 --- a/dsi/core.py +++ b/dsi/core.py @@ -14,7 +14,6 @@ class Terminal: front-ends or back-ends. Plugins may be producers or consumers. See documentation for more information. """ - DRIVER_PREFIX = ["dsi.drivers"] DRIVER_IMPLEMENTATIONS = ["gufi", "sqlite", "parquet"] PLUGIN_PREFIX = ["dsi.plugins"] From 2da580457473e6efc9dc7bce53e895a0f07a0583 Mon Sep 17 00:00:00 2001 From: qwofford Date: Tue, 19 Dec 2023 16:36:36 -0700 Subject: [PATCH 12/18] typos --- docs/permissions.rst | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/docs/permissions.rst b/docs/permissions.rst index 3a7dafd3..b226bcf6 100644 --- a/docs/permissions.rst +++ b/docs/permissions.rst @@ -1,22 +1,26 @@ Permissions =================== -DSI is capable of consuming information from files, environments, and in-situ processes which may or may not have the same permissions authority. To track this information for the purposes of returning user queries into DSI storage, we utilize a permissions handler. The permissions handler bundles the authority by which information is read and adds this to each column data structure. Most relational database systems require that types are encforced by column, and DSI extends this idea to require that permissions are enforced by column. By tracking the permissions associated with each column, DSI can save files using the same POSIX permissions authority that initially granted access to the information, therefore preserving POSIX permssions as files are saved. +DSI is capable of consuming information from files, environments, and in-situ processes which may or may not have the same permissions authority. To track this information for the purposes of returning user queries into DSI storage, we utilize a permissions handler. The permissions handler bundles the authority by which information is read and adds this to each column data structure. Most relational database systems require that types are enforced by column, and DSI extends this idea to require that permissions are enforced by column. By tracking the permissions associated with each column, DSI can save files using the same POSIX permissions authority that initially granted access to the information, therefore preserving POSIX permssions as files are saved. -By default, DSI will stop users from saving any data if the length of the union of the set of column permissions is greater than one. This prevents users from saving files that might have complex security implications. If a user enables the `allow_multiple_permissions` parameter of the `PermissionsManager`, then the number of files that will be saved is equal to the length of the union of the set of column permissions in the middelware data structures being written. There will be one file for each set of columns read by the same permissions authority. +By default, DSI will stop users from saving any metadata if the length of the union of the set of column permissions is greater than one. This prevents users from saving files that might have complex security implications. If a user enables the ``allow_multiple_permissions`` parameter of the ``PermissionsManager``, then the number of files that will be saved is equal to the length of the union of the set of column permissions in the middelware data structures being written. There will be one file for each set of columns read by the same permissions authority. -By default, DSI will always respect the POSIX security information by which information was read. If the usr wishes to override this behavior and write all of their metadata to the same file with a unified UID and GID, they can enable the `squash_permissions` perameter of the `PermissionsManager`. The user should be very certain that the information they are writing is protected appropriately in this case. +By default, DSI will always respect the POSIX security information by which information was read. If the usr wishes to override this behavior and write all of their metadata to the same file with a unified UID and GID, they can enable the ``squash_permissions`` perameter of the ``PermissionsManager``. The user should be very certain that the information they are writing is protected appropriately in this case. An example helps illustrate these scenarios: -| Col A | Col B | Col C | -========================= -|Perm D |Perm D | Perm F| -|Row A1 |Row B1 | Row C1| -|Row A2 |Row B2 | Row C2| ++----------+----------+----------+ +| Col A | Col B | Col C | ++----------+----------+----------+ +| *Perm D* | *Perm D* | *Perm F* | ++----------+----------+----------+ +| Row A1 | Row B1 | Row C1 | ++----------+----------+----------+ +| Row A2 | Row B2 | Row C2 | ++----------+----------+----------+ -By default, DSI will refuse to write this data structure to disk because `len(union({D,D,F})) > 1` +By default, DSI will refuse to write this data structure to disk because ``len(union({D,D,F})) > 1`` -If a user enables the `allow_multiple_permissions` parameter, two files will be saved: +If a user enables the ``allow_multiple_permissions`` parameter, two files will be saved: >>> $ cat file1 >>> | Col A | Col B | @@ -35,7 +39,7 @@ If a user enables the `allow_multiple_permissions` parameter, two files will be >>> $ get_perms(file2) >>> Perm F -If a user enables `allow_multiple_permissions` and `squash_permissions`, then a single file will be written with the users UID and effective GID and 660 access: +If a user enables ``allow_multiple_permissions`` and ``squash_permissions``, then a single file will be written with the users UID and effective GID and 660 access: >>> $ cat file >>> | Col A | Col B | Col C | From a5dd7a33aaa0e84e393c0b8a8dacf786226abae6 Mon Sep 17 00:00:00 2001 From: qwofford Date: Tue, 19 Dec 2023 16:38:06 -0700 Subject: [PATCH 13/18] editign for clarity --- docs/permissions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/permissions.rst b/docs/permissions.rst index b226bcf6..a08e8638 100644 --- a/docs/permissions.rst +++ b/docs/permissions.rst @@ -2,7 +2,7 @@ Permissions =================== DSI is capable of consuming information from files, environments, and in-situ processes which may or may not have the same permissions authority. To track this information for the purposes of returning user queries into DSI storage, we utilize a permissions handler. The permissions handler bundles the authority by which information is read and adds this to each column data structure. Most relational database systems require that types are enforced by column, and DSI extends this idea to require that permissions are enforced by column. By tracking the permissions associated with each column, DSI can save files using the same POSIX permissions authority that initially granted access to the information, therefore preserving POSIX permssions as files are saved. -By default, DSI will stop users from saving any metadata if the length of the union of the set of column permissions is greater than one. This prevents users from saving files that might have complex security implications. If a user enables the ``allow_multiple_permissions`` parameter of the ``PermissionsManager``, then the number of files that will be saved is equal to the length of the union of the set of column permissions in the middelware data structures being written. There will be one file for each set of columns read by the same permissions authority. +By default, DSI will stop users from saving any metadata if the length of the union of the set of column permissions is greater than one. This prevents users from saving files that might have complex security implications. If a user enables the ``allow_multiple_permissions`` parameter of the ``PermissionsManager``, then the number of files that will be saved is equal to the length of the union of the set of column permissions in the middleware data structures being written (an example of this behavior follows). There will be one file for each set of columns read by the same permissions authority. By default, DSI will always respect the POSIX security information by which information was read. If the usr wishes to override this behavior and write all of their metadata to the same file with a unified UID and GID, they can enable the ``squash_permissions`` perameter of the ``PermissionsManager``. The user should be very certain that the information they are writing is protected appropriately in this case. From 09fdeda4063dcb07001da56ef9665416620e0057 Mon Sep 17 00:00:00 2001 From: qwofford Date: Tue, 19 Dec 2023 16:44:57 -0700 Subject: [PATCH 14/18] editing for clarity and typos --- docs/permissions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/permissions.rst b/docs/permissions.rst index a08e8638..a488dbab 100644 --- a/docs/permissions.rst +++ b/docs/permissions.rst @@ -4,7 +4,7 @@ DSI is capable of consuming information from files, environments, and in-situ pr By default, DSI will stop users from saving any metadata if the length of the union of the set of column permissions is greater than one. This prevents users from saving files that might have complex security implications. If a user enables the ``allow_multiple_permissions`` parameter of the ``PermissionsManager``, then the number of files that will be saved is equal to the length of the union of the set of column permissions in the middleware data structures being written (an example of this behavior follows). There will be one file for each set of columns read by the same permissions authority. -By default, DSI will always respect the POSIX security information by which information was read. If the usr wishes to override this behavior and write all of their metadata to the same file with a unified UID and GID, they can enable the ``squash_permissions`` perameter of the ``PermissionsManager``. The user should be very certain that the information they are writing is protected appropriately in this case. +By default, DSI will always respect the POSIX permissions authority by which information was read. If the user wishes to override this behavior and write all of their metadata to the same file with a unified UID and GID, they can enable the ``squash_permissions`` perameter of the ``PermissionsManager``. The user should be very certain that the information they are writing is protected appropriately in this case. An example helps illustrate these scenarios: From 3fcfcc765441f59a8899ca582a5b0341b1a59ea4 Mon Sep 17 00:00:00 2001 From: qwofford Date: Tue, 19 Dec 2023 16:46:31 -0700 Subject: [PATCH 15/18] typo --- docs/permissions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/permissions.rst b/docs/permissions.rst index a488dbab..ddf975d3 100644 --- a/docs/permissions.rst +++ b/docs/permissions.rst @@ -4,7 +4,7 @@ DSI is capable of consuming information from files, environments, and in-situ pr By default, DSI will stop users from saving any metadata if the length of the union of the set of column permissions is greater than one. This prevents users from saving files that might have complex security implications. If a user enables the ``allow_multiple_permissions`` parameter of the ``PermissionsManager``, then the number of files that will be saved is equal to the length of the union of the set of column permissions in the middleware data structures being written (an example of this behavior follows). There will be one file for each set of columns read by the same permissions authority. -By default, DSI will always respect the POSIX permissions authority by which information was read. If the user wishes to override this behavior and write all of their metadata to the same file with a unified UID and GID, they can enable the ``squash_permissions`` perameter of the ``PermissionsManager``. The user should be very certain that the information they are writing is protected appropriately in this case. +By default, DSI will always respect the POSIX permissions authority by which information was read. If the user wishes to override this behavior and write all of their metadata to the same file with a unified UID and GID, they can enable the ``squash_permissions`` parameter of the ``PermissionsManager``. The user should be very certain that the information they are writing is protected appropriately in this case. An example helps illustrate these scenarios: From c76c45d16437d86b8a54aacdfa52c54749320697 Mon Sep 17 00:00:00 2001 From: qwofford Date: Tue, 19 Dec 2023 16:49:03 -0700 Subject: [PATCH 16/18] make users possessive --- docs/permissions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/permissions.rst b/docs/permissions.rst index ddf975d3..5a2cbfbf 100644 --- a/docs/permissions.rst +++ b/docs/permissions.rst @@ -39,7 +39,7 @@ If a user enables the ``allow_multiple_permissions`` parameter, two files will b >>> $ get_perms(file2) >>> Perm F -If a user enables ``allow_multiple_permissions`` and ``squash_permissions``, then a single file will be written with the users UID and effective GID and 660 access: +If a user enables ``allow_multiple_permissions`` and ``squash_permissions``, then a single file will be written with the user's UID and effective GID and 660 access: >>> $ cat file >>> | Col A | Col B | Col C | From a5cfabbe1063a24779289923613a3e6d4d759223 Mon Sep 17 00:00:00 2001 From: qwofford Date: Tue, 19 Dec 2023 16:51:04 -0700 Subject: [PATCH 17/18] change author from Terry to Triad --- docs/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 59f6672e..da88c1ca 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -7,8 +7,8 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = 'DSI' -copyright = '2023, Terry Turton' -author = 'Terry Turton' +copyright = '2023, Triad National Security, LLC' +author = 'Triad National Security, LLC' release = '0.0.0' # -- General configuration --------------------------------------------------- From f2eca7b69b6214dac19ec245cd4987be32d05df7 Mon Sep 17 00:00:00 2001 From: qwofford Date: Tue, 19 Dec 2023 16:53:19 -0700 Subject: [PATCH 18/18] fix indentation for sphinx docs --- dsi/core.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dsi/core.py b/dsi/core.py index 08203b3c..6bd72cfc 100644 --- a/dsi/core.py +++ b/dsi/core.py @@ -162,8 +162,7 @@ def add_external_python_module(self, mod_type, mod_name, mod_path): For example, term = Terminal() - term.add_external_python_module('plugin', 'my_python_file', - '/the/path/to/my_python_file.py') + term.add_external_python_module('plugin', 'my_python_file', '/the/path/to/my_python_file.py') term.load_module('plugin', 'MyPlugin', 'consumer')